diff --git a/README.md b/README.md index 568394cf3..dccea57d6 100644 --- a/README.md +++ b/README.md @@ -237,6 +237,7 @@ Summary: create a **line summary** of your evaluation, in `src/lighteval/tasks/t - `metric` (list), the metrics you want to use for your evaluation (see next section for a detailed explanation) - `output_regex` (str), A regex string that will be used to filter your generation. (Genrative metrics will only select tokens that are between the first and the second sequence matched by the regex. For example, for a regex matching `\n` and a generation `\nModel generation output\nSome other text` the metric will only be fed with `Model generation output`) - `frozen` (bool), for now is set to False, but we will steadily pass all stable tasks to True. +- `trust_dataset` (bool), set to True if you trust the dataset. Make sure you can launch your model with your new task using `--tasks lighteval|yournewtask|2|0`. diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 57eb1fd2a..889ebcec9 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -49,6 +49,7 @@ def __init__( stop_sequence=None, output_regex=None, frozen=False, + trust_dataset=True, ) @@ -115,6 +116,7 @@ def __init__( stop_sequence=None, output_regex=None, frozen=False, + trust_dataset=True, ) @@ -145,6 +147,7 @@ def acva(line, task_name: str = None): few_shots_split="validation", few_shots_select="sequential", metric=["loglikelihood_acc"], + trust_dataset=True, ) diff --git a/pyproject.toml b/pyproject.toml index 4b746a110..da08cc692 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ keywords = ["evaluation", "nlp", "llm"] dependencies = [ # Base dependencies "transformers>=4.38.0", - "huggingface_hub==0.20.3", + "huggingface_hub>=0.21.2", "torch>=2.0", "GitPython==3.1.31", # for logging "datasets>=2.14.0", diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py index 84bb74672..ac153a435 100644 --- a/src/lighteval/tasks/lighteval_task.py +++ b/src/lighteval/tasks/lighteval_task.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import TYPE_CHECKING, List, Optional, Tuple, Union -from datasets import load_dataset +from datasets import DownloadMode, load_dataset from lighteval.few_shot_manager import FewShotSampler from lighteval.logging.hierarchical_logger import hlog, hlog_warn @@ -62,7 +62,7 @@ class LightevalTaskConfig: truncated_num_docs (bool): Whether less than the total number of documents were used output_regex (str) frozen (bool) - + trust_dataset (bool): Whether to trust the dataset at execution or not """ name: str @@ -84,6 +84,8 @@ class LightevalTaskConfig: original_num_docs: int = -1 effective_num_docs: int = -1 + trust_dataset: bool = None + def as_dict(self): return { "name": self.name, @@ -144,6 +146,7 @@ def __init__(self, name: str, cfg: LightevalTaskConfig, cache_dir: Optional[str] self.dataset_path = self.hf_repo self.dataset_config_name = self.hf_subset self.dataset = None # Delayed download + self.trust_dataset = cfg.trust_dataset hlog(f"{self.dataset_path} {self.dataset_config_name}") self._fewshot_docs = None self._docs = None @@ -521,14 +524,10 @@ def load_datasets(tasks: list["LightevalTask"], dataset_loading_processes: int = """ if dataset_loading_processes <= 1: - datasets = [ - download_dataset_worker((task.dataset_path, task.dataset_config_name)) for task in tasks - ] # Also help us with gdb + datasets = [download_dataset_worker(task) for task in tasks] # Also help us with gdb else: with Pool(processes=dataset_loading_processes) as pool: - datasets = pool.map( - download_dataset_worker, [(task.dataset_path, task.dataset_config_name) for task in tasks] - ) + datasets = pool.map(download_dataset_worker, tasks) for task, dataset in zip(tasks, datasets): task.dataset = dataset @@ -539,13 +538,14 @@ def download_dataset_worker(args): Worker function to download a dataset from the HuggingFace Hub. Used for parallel dataset loading. """ - dataset_path, dataset_config_name = args + task: LightevalTask = args dataset = load_dataset( - path=dataset_path, - name=dataset_config_name, + path=task.dataset_path, + name=task.dataset_config_name, data_dir=None, cache_dir=None, - download_mode=None, + download_mode=DownloadMode.FORCE_REDOWNLOAD, # None + trust_remote_code=task.trust_dataset, ) return dataset diff --git a/src/lighteval/tasks/tasks_table.jsonl b/src/lighteval/tasks/tasks_table.jsonl index 69431d2ec..00a718acd 100644 --- a/src/lighteval/tasks/tasks_table.jsonl +++ b/src/lighteval/tasks/tasks_table.jsonl @@ -1,1147 +1,1147 @@ -{"name":"abstract_narrative_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"abstract_narrative_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"anachronisms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"anachronisms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"analogical_similarity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"analogical_similarity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"analytic_entailment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"analytic_entailment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"anli","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r1","dev_r1","train_r2","dev_r2","train_r3","dev_r3","test_r1","test_r2","test_r3"],"evaluation_splits":["test_r1","test_r2","test_r3"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"anli:r1","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r1","dev_r1","test_r1"],"evaluation_splits":["test_r1"],"few_shots_split":"train_r1","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"anli:r2","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r2","dev_r2","test_r2"],"evaluation_splits":["test_r2"],"few_shots_split":"train_r2","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"anli:r3","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r3","dev_r3","test_r3"],"evaluation_splits":["test_r3"],"few_shots_split":"train_r3","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arc:c:letters","suite":["original","arc"],"prompt_function":"arc_with_options_letters_predict","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arc:c:options","suite":["original","arc"],"prompt_function":"arc_with_options","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arc:c:simple","suite":["original","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arc:challenge","suite":["lighteval","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arc:easy","suite":["lighteval","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Easy","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:1dc","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_1dc","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:2da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:2dm","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2dm","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:2ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:3da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_3da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:3ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_3ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:4da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_4da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:4ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_4ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:5da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_5da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic:5ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_5ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"arithmetic_bb","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"arithmetic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"ascii_word_recognition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"ascii_word_recognition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"asdiv","suite":["lighteval"],"prompt_function":"asdiv","hf_repo":"EleutherAI\/asdiv","hf_subset":"asdiv","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"authorship_verification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"authorship_verification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"auto_categorization","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"auto_categorization","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"auto_debugging","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_and_after_query","hf_repo":"bigbench","hf_subset":"auto_debugging","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+"} -{"name":"babi_qa","suite":["helm"],"prompt_function":"babi_qa","hf_repo":"facebook\/babi_qa","hf_subset":"en-valid-qa1","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Age","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Age","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Disability_status","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Disability_status","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Gender_identity","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Gender_identity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Nationality","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Nationality","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Physical_appearance","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Physical_appearance","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Race_ethnicity","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_ethnicity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Race_x_SES","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_x_SES","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Race_x_gender","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_x_gender","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Religion","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Religion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:SES","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"SES","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq:Sexual_orientation","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Sexual_orientation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bbq_lite_json","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"bbq_lite_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:auto_debugging","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"auto_debugging","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:age_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-age_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:age_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-age_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:disability_status_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-disability_status_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:disability_status_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-disability_status_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:gender_identity_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-gender_identity_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:gender_identity_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-gender_identity_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:nationality_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-nationality_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:nationality_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-nationality_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:physical_appearance_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-physical_appearance_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:physical_appearance_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-physical_appearance_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:race_ethnicity_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-race_ethnicity_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:race_ethnicity_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-race_ethnicity_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:religion_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-religion_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:religion_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-religion_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:ses_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-ses_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:ses_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-ses_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:sexual_orientation_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-sexual_orientation_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:bbq_lite_json:sexual_orientation_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-sexual_orientation_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:code_line_description","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"code_line_description","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conceptual_combinations:contradictions","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-contradictions","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conceptual_combinations:emergent_properties","suite":["helm"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-emergent_properties","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conceptual_combinations:fanciful_fictional_combinations","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-fanciful_fictional_combinations","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conceptual_combinations:homonyms","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-homonyms","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conceptual_combinations:invented_words","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-invented_words","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:adna_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-adna_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:adna_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-adna_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:atikampe_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-atikampe_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:atikampe_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-atikampe_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:gornam_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-gornam_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:gornam_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-gornam_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:holuan_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-holuan_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:holuan_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-holuan_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:mkafala_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-mkafala_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:mkafala_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-mkafala_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:postpositive_english_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-postpositive_english_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:postpositive_english_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-postpositive_english_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:unapuri_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-unapuri_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:unapuri_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-unapuri_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:vaomi_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-vaomi_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:conlang_translation:vaomi_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-vaomi_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:emoji_movie","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"emoji_movie","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:formal_fallacies_syllogisms_negation","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"formal_fallacies_syllogisms_negation","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:hindu_knowledge","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"hindu_knowledge","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:known_unknowns","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"known_unknowns","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:language_identification","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"language_identification","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:linguistics_puzzles","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"linguistics_puzzles","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:logic_grid_puzzle","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logic_grid_puzzle","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:logical_deduction-five_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-five_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:logical_deduction-seven_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-seven_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:logical_deduction-three_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-three_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:misconceptions_russian","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"misconceptions_russian","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:novel_concepts","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"novel_concepts","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:operators","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"operators","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:parsinlu_reading_comprehension","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"parsinlu_reading_comprehension","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:play_dialog_same_or_different","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"play_dialog_same_or_different","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:repeat_copy_logic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"repeat_copy_logic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:strange_stories-boolean","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strange_stories-boolean","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:strange_stories-multiple_choice","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strange_stories-multiple_choice","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:strategyqa","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strategyqa","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:symbol_interpretation-adversarial","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-adversarial","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:symbol_interpretation-emoji_agnostic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-emoji_agnostic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:symbol_interpretation-name_agnostic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-name_agnostic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:symbol_interpretation-plain","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-plain","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:symbol_interpretation-tricky","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-tricky","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:vitaminc_fact_verification","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"vitaminc_fact_verification","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bigbench:winowhy","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"winowhy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:adjunct_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"adjunct_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:adjunct_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"adjunct_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:anaphor_gender_agreement","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"anaphor_gender_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:anaphor_gender_agreement","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"anaphor_gender_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:anaphor_number_agreement","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"anaphor_number_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:anaphor_number_agreement","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"anaphor_number_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:animate_subject_passive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"animate_subject_passive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:animate_subject_passive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"animate_subject_passive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:animate_subject_trans","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"animate_subject_trans","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:animate_subject_trans","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"animate_subject_trans","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:causative","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"causative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:causative","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"causative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:complex_NP_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"complex_NP_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:complex_NP_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"complex_NP_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:coordinate_structure_constraint_complex_left_branch","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_complex_left_branch","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:coordinate_structure_constraint_complex_left_branch","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_complex_left_branch","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:coordinate_structure_constraint_object_extraction","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_object_extraction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:coordinate_structure_constraint_object_extraction","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_object_extraction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_irregular_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_irregular_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_irregular_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_irregular_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adj_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adj_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adj_irregular_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adj_irregular_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adj_irregular_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adj_irregular_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adjective_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adjective_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:determiner_noun_agreement_with_adjective_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adjective_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:distractor_agreement_relational_noun","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"distractor_agreement_relational_noun","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:distractor_agreement_relational_noun","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"distractor_agreement_relational_noun","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:distractor_agreement_relative_clause","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"distractor_agreement_relative_clause","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:distractor_agreement_relative_clause","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"distractor_agreement_relative_clause","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:drop_argument","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"drop_argument","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:drop_argument","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"drop_argument","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:ellipsis_n_bar_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:ellipsis_n_bar_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:ellipsis_n_bar_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:ellipsis_n_bar_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_object_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_object_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_quantifiers_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_quantifiers_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_quantifiers_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_quantifiers_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_subject_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_subject_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:existential_there_subject_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_subject_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:expletive_it_object_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"expletive_it_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:expletive_it_object_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"expletive_it_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:inchoative","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"inchoative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:inchoative","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"inchoative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:intransitive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"intransitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:intransitive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"intransitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_past_participle_adjectives","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_past_participle_adjectives","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_past_participle_adjectives","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_past_participle_adjectives","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_past_participle_verbs","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_past_participle_verbs","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_past_participle_verbs","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_past_participle_verbs","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_plural_subject_verb_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_plural_subject_verb_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_plural_subject_verb_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:irregular_plural_subject_verb_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:left_branch_island_echo_question","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"left_branch_island_echo_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:left_branch_island_echo_question","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"left_branch_island_echo_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:left_branch_island_simple_question","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"left_branch_island_simple_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:left_branch_island_simple_question","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"left_branch_island_simple_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:matrix_question_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"matrix_question_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:matrix_question_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"matrix_question_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:npi_present_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"npi_present_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:npi_present_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"npi_present_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:npi_present_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"npi_present_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:npi_present_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"npi_present_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:only_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"only_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:only_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"only_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:only_npi_scope","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"only_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:only_npi_scope","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"only_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:passive_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"passive_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:passive_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"passive_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:passive_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"passive_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:passive_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"passive_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_c_command","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_c_command","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_c_command","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_c_command","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_case_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_case_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_case_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_case_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_case_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_case_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_case_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_case_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_domain_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_domain_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_domain_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_domain_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_domain_3","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_3","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_domain_3","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_3","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_reconstruction","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_reconstruction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:principle_A_reconstruction","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_reconstruction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:regular_plural_subject_verb_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:regular_plural_subject_verb_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:regular_plural_subject_verb_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:regular_plural_subject_verb_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:sentential_negation_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_negation_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:sentential_negation_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_negation_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:sentential_negation_npi_scope","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_negation_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:sentential_negation_npi_scope","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_negation_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:sentential_subject_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_subject_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:sentential_subject_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_subject_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:superlative_quantifiers_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"superlative_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:superlative_quantifiers_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"superlative_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:superlative_quantifiers_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"superlative_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:superlative_quantifiers_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"superlative_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:tough_vs_raising_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"tough_vs_raising_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:tough_vs_raising_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"tough_vs_raising_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:tough_vs_raising_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"tough_vs_raising_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:tough_vs_raising_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"tough_vs_raising_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:transitive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"transitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:transitive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"transitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_questions_object_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_object_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_questions_object_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_object_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_questions_subject_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_questions_subject_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_questions_subject_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_questions_subject_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_no_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_no_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_no_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_no_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_with_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_with_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_with_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"blimp:wh_vs_that_with_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bold","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bold:gender","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"gender","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bold:political_ideology","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"political_ideology","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bold:profession","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"profession","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bold:race","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"race","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bold:religious_ideology","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"religious_ideology","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"boolq","suite":["helm","helm_general"],"prompt_function":"boolq_helm","hf_repo":"lighteval\/boolq_helm","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"boolq:contrastset","suite":["helm"],"prompt_function":"boolq_helm_contrastset","hf_repo":"lighteval\/boolq_helm","hf_subset":"default","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"bridging_anaphora_resolution_barqa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"bridging_anaphora_resolution_barqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"causal_judgment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"causal_judgment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"cause_and_effect","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cause_and_effect","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"checkmate_in_one","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"checkmate_in_one","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"chess_state_tracking","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"chess_state_tracking","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"chinese_remainder_theorem","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"chinese_remainder_theorem","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"cifar10_classification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cifar10_classification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments","suite":["helm","helm_general"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:LGBTQ","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"LGBTQ","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:black","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"black","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:christian","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"christian","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:female","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"female","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:male","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"male","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:muslim","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"muslim","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:other_religions","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"other_religions","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"civil_comments:white","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"white","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"code_line_description","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_and_after_query","hf_repo":"bigbench","hf_subset":"code_line_description","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"codenames","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"codenames","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"color","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"color","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"common_morpheme","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"common_morpheme","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"commonsenseqa","suite":["helm","commonsense_scenario"],"prompt_function":"commonsense_qa","hf_repo":"commonsense_qa","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"conceptual_combinations","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"conceptual_combinations","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"conlang_translation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"conlang_translation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge_t5","bleu","perfect_exact_match"],"stop_sequence":[".",";","!","?"],"output_regex":"[^\\.\\?\\!\\;\\n]+"} -{"name":"contextual_parametric_knowledge_conflicts","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"contextual_parametric_knowledge_conflicts","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:oh_the_places","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"oh_the_places","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:pilot","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"pilot","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:popular_books-prefix_length_10","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_10","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:popular_books-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:popular_books-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:popular_books-prefix_length_250","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_250","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:popular_books-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:popular_books-prefix_length_50","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_50","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:prompt_num_line_1-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_1-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:prompt_num_line_10-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_10-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"copyright:prompt_num_line_5-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_5-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"coqa","suite":["lighteval"],"prompt_function":"coqa","hf_repo":"coqa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["perfect_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"coqa_bb","suite":["lighteval","bigbench_programmatic","bigbench"],"prompt_function":"coqa","hf_repo":"coqa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["perfect_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"covid_dialogue","suite":["helm"],"prompt_function":"covid_dialogue","hf_repo":"lighteval\/covid_dialogue","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"crash_blossom","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"crash_blossom","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"crass_ai","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"crass_ai","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"cryobiology_spanish","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cryobiology_spanish","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"cryptonite","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cryptonite","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"cs_algorithms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cs_algorithms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"dark_humor_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"dark_humor_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"date_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"date_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"disambiguation_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"disambiguation_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"discourse_marker_prediction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"discourse_marker_prediction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"disfl_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"disfl_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"drop","suite":["lighteval"],"prompt_function":"drop","hf_repo":"lighteval/drop_harness","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":"train","few_shots_select":"random_sampling_from_train","generation_size":null,"metric":["drop"],"stop_sequence":["."],"output_regex":null,"frozen":false} -{"name":"dyck_language:2","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"2","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"dyck_language:3","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"3","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"dyck_language:4","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"4","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"dyck_languages","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"dyck_languages","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"elementary_math_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"elementary_math_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"emoji_movie","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"emoji_movie","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"emojis_emotion_prediction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"emojis_emotion_prediction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"empirical_judgments","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"empirical_judgments","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"english_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"english_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"english_russian_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"english_russian_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entailed_polarity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"entailed_polarity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entailed_polarity_hindi","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"entailed_polarity_hindi","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_data_imputation:Buy","suite":["helm"],"prompt_function":"entity_data_imputation","hf_repo":"lighteval\/Buy","hf_subset":"default","hf_avail_splits":["train","test","valid"],"evaluation_splits":["valid","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_data_imputation:Restaurant","suite":["helm"],"prompt_function":"entity_data_imputation","hf_repo":"lighteval\/Restaurant","hf_subset":"default","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Abt_Buy","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Abt_Buy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Amazon_Google","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Amazon_Google","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Beer","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Beer","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Company","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Company","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:DBLP_ACM","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"DBLP_ACM","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:DBLP_GoogleScholar","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"DBLP_GoogleScholar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Dirty_DBLP_ACM","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_DBLP_ACM","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Dirty_DBLP_GoogleScholar","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_DBLP_GoogleScholar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Dirty_Walmart_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_Walmart_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Dirty_iTunes_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_iTunes_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Fodors_Zagats","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Fodors_Zagats","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:Walmart_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Walmart_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"entity_matching:iTunes_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"iTunes_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"epistemic_reasoning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"epistemic_reasoning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"ethics:commonsense","suite":["lighteval","ethics"],"prompt_function":"ethics_commonsense","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"commonsense","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"ethics:deontology","suite":["lighteval","ethics"],"prompt_function":"ethics_deontology","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"deontology","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"ethics:justice","suite":["lighteval","ethics"],"prompt_function":"ethics_justice","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"justice","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"ethics:utilitarianism","suite":["lighteval","ethics"],"prompt_function":"ethics_utilitarianism","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"utilitarianism","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"ethics:virtue","suite":["lighteval","ethics"],"prompt_function":"ethics_virtue","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"virtue","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"evaluating_information_essentiality","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"evaluating_information_essentiality","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"fact_checker","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"fact_checker","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"fantasy_reasoning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"fantasy_reasoning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"few_shot_nlg","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"few_shot_nlg","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","bleurt"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"figure_of_speech_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"figure_of_speech_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"formal_fallacies_syllogisms_negation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"formal_fallacies_syllogisms_negation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"gem","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gem","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"gender_inclusive_sentences_german","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gender_inclusive_sentences_german","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"general_knowledge","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"general_knowledge","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"geometric_shapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"geometric_shapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:cola","suite":["lighteval","glue"],"prompt_function":"cola","hf_repo":"glue","hf_subset":"cola","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token", "mcc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:mnli","suite":["lighteval","glue"],"prompt_function":"mnli","hf_repo":"glue","hf_subset":"mnli_matched","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:mnli_mismatched","suite":["lighteval","glue"],"prompt_function":"mnli","hf_repo":"glue","hf_subset":"mnli_mismatched","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:mrpc","suite":["lighteval","glue"],"prompt_function":"mrpc","hf_repo":"glue","hf_subset":"mrpc","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_f1"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:qnli","suite":["lighteval","glue"],"prompt_function":"qnli","hf_repo":"glue","hf_subset":"qnli","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:qqp","suite":["lighteval","glue"],"prompt_function":"qqp","hf_repo":"glue","hf_subset":"qqp","hf_avail_splits":["train","validation","test"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_f1"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:rte","suite":["lighteval","glue"],"prompt_function":"rte","hf_repo":"glue","hf_subset":"rte","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:sst2","suite":["lighteval","glue"],"prompt_function":"sst","hf_repo":"glue","hf_subset":"sst2","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:stsb","suite":["lighteval","glue"],"prompt_function":"stsb","hf_repo":"glue","hf_subset":"stsb","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"glue:wnli","suite":["lighteval","glue"],"prompt_function":"wnli","hf_repo":"glue","hf_subset":"wnli","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"goal_step_wikihow","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"goal_step_wikihow","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"gpqa","suite":["lighteval"],"prompt_function":"gpqa","hf_repo":"Idavidrein/gpqa","hf_subset":"gpqa_main","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"gre_reading_comprehension","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gre_reading_comprehension","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"gsm8k","suite":["lighteval"],"prompt_function":"gsm8k","hf_repo":"gsm8k","hf_subset":"main","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":256,"metric":["quasi_exact_match_gsm8k"],"stop_sequence":[":","Question:", "Question"],"output_regex":null,"frozen":false} -{"name":"headqa:en","suite":["lighteval","headqa"],"prompt_function":"headqa","hf_repo":"lighteval/headqa_harness","hf_subset":"en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"headqa:es","suite":["lighteval","headqa"],"prompt_function":"headqa","hf_repo":"lighteval/headqa_harness","hf_subset":"es","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"hellaswag","suite":["lighteval"],"prompt_function":"hellaswag_harness","hf_repo":"hellaswag","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"hellaswag","suite":["helm","helm_general"],"prompt_function":"hellaswag_helm","hf_repo":"hellaswag","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"hhh_alignment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hhh_alignment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"hindi_question_answering","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hindi_question_answering","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"hindu_knowledge","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"hindu_knowledge","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"hinglish_toxicity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hinglish_toxicity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"human_organs_senses","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"human_organs_senses","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"humaneval","suite":["helm","code_scenario"],"prompt_function":"humaneval","hf_repo":"openai_humaneval","hf_subset":"openai_humaneval","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":600,"metric":["code_humaneval"],"stop_sequence":["\nclass","\ndef","\nif","\nprint"],"output_regex":null,"frozen":false} -{"name":"hyperbaton","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hyperbaton","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"identify_math_theorems","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"identify_math_theorems","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"identify_odd_metaphor","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"identify_odd_metaphor","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"imdb","suite":["helm","helm_general"],"prompt_function":"imdb","hf_repo":"lighteval\/IMDB_helm","hf_subset":"default","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"imdb:contrastset","suite":["helm"],"prompt_function":"imdb_contrastset","hf_repo":"lighteval\/IMDB_helm","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"implicatures","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"implicatures","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"implicit_relations","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"implicit_relations","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"intent_recognition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"intent_recognition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"interactive_qa_mmlu:abstract_algebra","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_abstract_algebra","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"interactive_qa_mmlu:college_chemistry","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_college_chemistry","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"interactive_qa_mmlu:global_facts","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_global_facts","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"interactive_qa_mmlu:miscellaneous","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_miscellaneous","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"interactive_qa_mmlu:nutrition","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_nutrition","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"interactive_qa_mmlu:us_foreign_policy","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_us_foreign_policy","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"international_phonetic_alphabet_nli","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"international_phonetic_alphabet_nli","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"international_phonetic_alphabet_transliterate","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"international_phonetic_alphabet_transliterate","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"intersect_geometry","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"intersect_geometry","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"irony_identification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"irony_identification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:ar-en","suite":["lighteval","harness_selection"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ar-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:en-ar","suite":["lighteval","harness_selection"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ar-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:en-ko","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-ko","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:ko-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ko-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"iwslt17:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"kanji_ascii","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"kanji_ascii","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"kannada","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"kannada","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"key_value_maps","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"key_value_maps","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"known_unknowns","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"known_unknowns","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:standard","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"lambada","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:standard_cloze","suite":["lighteval","lambada"],"prompt_function":"lambada_cloze","hf_repo":"lambada","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:openai","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:openai:de","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:openai:en","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:openai:es","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:openai:fr","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:openai:it","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"it","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lambada:openai_cloze","suite":["lighteval","lambada"],"prompt_function":"lambada_cloze","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"language_games","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"language_games","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"language_identification","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"language_identification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"legal_summarization:billsum","suite":["helm"],"prompt_function":"legal_summarization","hf_repo":"lighteval\/legal_summarization","hf_subset":"BillSum","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1024,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"legal_summarization:eurlexsum","suite":["helm"],"prompt_function":"legal_summarization","hf_repo":"lighteval\/legal_summarization","hf_subset":"EurLexSum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"legal_summarization:multilexsum","suite":["helm"],"prompt_function":"multilexsum","hf_repo":"lighteval\/legal_summarization","hf_subset":"MultiLexSum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":256,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"legalsupport","suite":["helm"],"prompt_function":"legal_support","hf_repo":"lighteval\/LegalSupport","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lexglue:case_hold","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_case_hold","hf_repo":"lighteval\/lexglue","hf_subset":"case_hold","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lexglue:ecthr_a","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ecthr_a","hf_repo":"lighteval\/lexglue","hf_subset":"ecthr_a","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lexglue:ecthr_b","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ecthr_b","hf_repo":"lighteval\/lexglue","hf_subset":"ecthr_b","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lexglue:eurlex","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_eurlex","hf_repo":"lighteval\/lexglue","hf_subset":"eurlex","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lexglue:ledgar","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ledgar","hf_repo":"lighteval\/lexglue","hf_subset":"ledgar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lexglue:scotus","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_scotus","hf_repo":"lighteval\/lexglue","hf_subset":"scotus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lexglue:unfair_tos","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_unfair_tos","hf_repo":"lighteval\/lexglue","hf_subset":"unfair_tos","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:brazilian_court_decisions_judgment","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_brazilian_court_decisions_judgment","hf_repo":"lighteval\/lextreme","hf_subset":"brazilian_court_decisions_judgment","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:brazilian_court_decisions_unanimity","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_brazilian_court_decisions_unanimity","hf_repo":"lighteval\/lextreme","hf_subset":"brazilian_court_decisions_unanimity","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:covid19_emergency_event","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_covid19_emergency_event","hf_repo":"lighteval\/lextreme","hf_subset":"covid19_emergency_event","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:german_argument_mining","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_german_argument_mining","hf_repo":"lighteval\/lextreme","hf_subset":"german_argument_mining","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:greek_legal_code_chapter","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_chapter","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_chapter","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:greek_legal_code_subject","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_subject","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_subject","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:greek_legal_code_volume","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_volume","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_volume","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:greek_legal_ner","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_ner","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_ner","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":430,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:legalnero","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_legalnero","hf_repo":"lighteval\/lextreme","hf_subset":"legalnero","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":788,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:lener_br","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_lener_br","hf_repo":"lighteval\/lextreme","hf_subset":"lener_br","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":338,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:mapa_coarse","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_mapa_coarse","hf_repo":"lighteval\/lextreme","hf_subset":"mapa_coarse","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":274,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:mapa_fine","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_mapa_fine","hf_repo":"lighteval\/lextreme","hf_subset":"mapa_fine","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":274,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:multi_eurlex_level_1","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_1","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_1","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:multi_eurlex_level_2","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_2","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_2","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:multi_eurlex_level_3","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_3","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_3","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:online_terms_of_service_clause_topics","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_online_terms_of_service_clause_topics","hf_repo":"lighteval\/lextreme","hf_subset":"online_terms_of_service_clause_topics","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:online_terms_of_service_unfairness_levels","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_online_terms_of_service_unfairness_levels","hf_repo":"lighteval\/lextreme","hf_subset":"online_terms_of_service_unfairness_levels","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lextreme:swiss_judgment_prediction","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_swiss_judgment_prediction","hf_repo":"lighteval\/lextreme","hf_subset":"swiss_judgment_prediction","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"linguistic_mappings","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"linguistic_mappings","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"linguistics_puzzles","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"linguistics_puzzles","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+"} -{"name":"logic_grid_puzzle","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logic_grid_puzzle","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"logical_args","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_args","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"logical_deduction","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"logical_deduction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"logical_fallacy_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_fallacy_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"logical_sequence","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_sequence","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"logiqa","suite":["lighteval"],"prompt_function":"logiqa","hf_repo":"lighteval/logiqa_harness","hf_subset":"logiqa","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lsat_qa","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"all","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lsat_qa:assignment","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"assignment","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lsat_qa:grouping","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"grouping","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lsat_qa:miscellaneous","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"miscellaneous","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"lsat_qa:ordering","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"ordering","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"math:algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"math:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"math:geometry","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"math:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"math:number_theory","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"math:prealgebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"math:precalculus","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mathematical_induction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mathematical_induction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mathqa","suite":["lighteval"],"prompt_function":"mathqa","hf_repo":"math_qa","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"matrixshapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"matrixshapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"me_q_sum","suite":["helm"],"prompt_function":"me_q_sum","hf_repo":"lighteval\/me_q_sum","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"med_dialog:healthcaremagic","suite":["helm"],"prompt_function":"med_dialog","hf_repo":"lighteval\/med_dialog","hf_subset":"healthcaremagic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"med_dialog:icliniq","suite":["helm"],"prompt_function":"med_dialog","hf_repo":"lighteval\/med_dialog","hf_subset":"icliniq","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"med_mcqa","suite":["helm"],"prompt_function":"med_mcqa","hf_repo":"lighteval\/med_mcqa","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"med_paragraph_simplification","suite":["helm"],"prompt_function":"med_paragraph_simplification","hf_repo":"lighteval\/med_paragraph_simplification","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":512,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"med_qa","suite":["helm"],"prompt_function":"med_qa","hf_repo":"bigbio\/med_qa","hf_subset":"med_qa_en_source","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"metaphor_boolean","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"metaphor_boolean","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"metaphor_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"metaphor_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mgsm:en","suite":["lighteval"],"prompt_function":"mgsm_en","hf_repo":"juletxara/mgsm","hf_subset":"en","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Question:"],"output_regex":null,"frozen":false} -{"name":"mgsm:es","suite":["lighteval"],"prompt_function":"mgsm_es","hf_repo":"juletxara/mgsm","hf_subset":"es","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Pregunta:"],"output_regex":null,"frozen":false} -{"name":"mgsm:fr","suite":["lighteval"],"prompt_function":"mgsm_fr","hf_repo":"juletxara/mgsm","hf_subset":"fr","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Question:"],"output_regex":null,"frozen":false} -{"name":"mgsm:de","suite":["lighteval"],"prompt_function":"mgsm_de","hf_repo":"juletxara/mgsm","hf_subset":"de","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Frage:"],"output_regex":null,"frozen":false} -{"name":"mgsm:ru","suite":["lighteval"],"prompt_function":"mgsm_ru","hf_repo":"juletxara/mgsm","hf_subset":"ru","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0417\u0430\u0434\u0430\u0447\u0430:"],"output_regex":null,"frozen":false} -{"name":"mgsm:zh","suite":["lighteval"],"prompt_function":"mgsm_zh","hf_repo":"juletxara/mgsm","hf_subset":"zh","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u95ee\u9898:"],"output_regex":null,"frozen":false} -{"name":"mgsm:ja","suite":["lighteval"],"prompt_function":"mgsm_ja","hf_repo":"juletxara/mgsm","hf_subset":"ja","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u554f\u984c:"],"output_regex":null,"frozen":false} -{"name":"mgsm:th","suite":["lighteval"],"prompt_function":"mgsm_th","hf_repo":"juletxara/mgsm","hf_subset":"th","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0e42\u0e08\u0e17\u0e22\u0e4c:"],"output_regex":null,"frozen":false} -{"name":"mgsm:sw","suite":["lighteval"],"prompt_function":"mgsm_sw","hf_repo":"juletxara/mgsm","hf_subset":"sw","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Swali:"],"output_regex":null,"frozen":false} -{"name":"mgsm:bn","suite":["lighteval"],"prompt_function":"mgsm_bn","hf_repo":"juletxara/mgsm","hf_subset":"bn","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8:"],"output_regex":null,"frozen":false} -{"name":"mgsm:te","suite":["lighteval"],"prompt_function":"mgsm_te","hf_repo":"juletxara/mgsm","hf_subset":"te","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28:"],"output_regex":null,"frozen":false} -{"name":"minute_mysteries_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"minute_mysteries_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"misconceptions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"misconceptions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"misconceptions_russian","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"misconceptions_russian","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"all","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu","suite":["original"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"all","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:abstract_algebra","suite":["original","mmlu"],"prompt_function":"mmlu_abstract_algebra","hf_repo":"cais\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:abstract_algebra","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:abstract_algebra","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:anatomy","suite":["original","mmlu"],"prompt_function":"mmlu_anatomy","hf_repo":"cais\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:anatomy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:anatomy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:astronomy","suite":["original","mmlu"],"prompt_function":"mmlu_astronomy","hf_repo":"cais\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:astronomy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:astronomy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:business_ethics","suite":["original","mmlu"],"prompt_function":"mmlu_business_ethics","hf_repo":"cais\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:business_ethics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:business_ethics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:clinical_knowledge","suite":["original","mmlu"],"prompt_function":"mmlu_clinical_knowledge","hf_repo":"cais\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:clinical_knowledge","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:clinical_knowledge","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_biology","suite":["original","mmlu"],"prompt_function":"mmlu_college_biology","hf_repo":"cais\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_biology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_biology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_chemistry","suite":["original","mmlu"],"prompt_function":"mmlu_college_chemistry","hf_repo":"cais\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_chemistry","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_chemistry","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_computer_science","suite":["original","mmlu"],"prompt_function":"mmlu_college_computer_science","hf_repo":"cais\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_computer_science","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_computer_science","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_college_mathematics","hf_repo":"cais\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_mathematics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_medicine","suite":["original","mmlu"],"prompt_function":"mmlu_college_medicine","hf_repo":"cais\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_medicine","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_medicine","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_physics","suite":["original","mmlu"],"prompt_function":"mmlu_college_physics","hf_repo":"cais\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_physics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:college_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:computer_security","suite":["original","mmlu"],"prompt_function":"mmlu_computer_security","hf_repo":"cais\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:computer_security","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:computer_security","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:conceptual_physics","suite":["original","mmlu"],"prompt_function":"mmlu_conceptual_physics","hf_repo":"cais\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:conceptual_physics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:conceptual_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:econometrics","suite":["original","mmlu"],"prompt_function":"mmlu_econometrics","hf_repo":"cais\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:econometrics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:econometrics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:electrical_engineering","suite":["original","mmlu"],"prompt_function":"mmlu_electrical_engineering","hf_repo":"cais\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:electrical_engineering","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:electrical_engineering","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:elementary_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_elementary_mathematics","hf_repo":"cais\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:elementary_mathematics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:elementary_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:formal_logic","suite":["original","mmlu"],"prompt_function":"mmlu_formal_logic","hf_repo":"cais\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:formal_logic","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:formal_logic","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:global_facts","suite":["original","mmlu"],"prompt_function":"mmlu_global_facts","hf_repo":"cais\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:global_facts","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:global_facts","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_biology","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_biology","hf_repo":"cais\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_biology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_biology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_chemistry","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_chemistry","hf_repo":"cais\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_chemistry","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_chemistry","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_computer_science","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_computer_science","hf_repo":"cais\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_computer_science","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_computer_science","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_european_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_european_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_european_history","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_european_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_geography","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_geography","hf_repo":"cais\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_geography","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_geography","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_government_and_politics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_government_and_politics","hf_repo":"cais\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_government_and_politics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_government_and_politics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_macroeconomics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_macroeconomics","hf_repo":"cais\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_macroeconomics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_macroeconomics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_mathematics","hf_repo":"cais\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_mathematics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_microeconomics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_microeconomics","hf_repo":"cais\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_microeconomics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_microeconomics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_physics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_physics","hf_repo":"cais\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_physics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_psychology","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_psychology","hf_repo":"cais\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_psychology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_psychology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_statistics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_statistics","hf_repo":"cais\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_statistics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_statistics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_us_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_us_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_us_history","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_us_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_world_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_world_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_world_history","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:high_school_world_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:human_aging","suite":["original","mmlu"],"prompt_function":"mmlu_human_aging","hf_repo":"cais\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:human_aging","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:human_aging","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:human_sexuality","suite":["original","mmlu"],"prompt_function":"mmlu_human_sexuality","hf_repo":"cais\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:human_sexuality","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:human_sexuality","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:international_law","suite":["original","mmlu"],"prompt_function":"mmlu_international_law","hf_repo":"cais\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:international_law","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:international_law","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:jurisprudence","suite":["original","mmlu"],"prompt_function":"mmlu_jurisprudence","hf_repo":"cais\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:jurisprudence","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:jurisprudence","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:logical_fallacies","suite":["original","mmlu"],"prompt_function":"mmlu_logical_fallacies","hf_repo":"cais\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:logical_fallacies","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:logical_fallacies","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:machine_learning","suite":["original","mmlu"],"prompt_function":"mmlu_machine_learning","hf_repo":"cais\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:machine_learning","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:machine_learning","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:management","suite":["original","mmlu"],"prompt_function":"mmlu_management","hf_repo":"cais\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:management","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:management","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:marketing","suite":["original","mmlu"],"prompt_function":"mmlu_marketing","hf_repo":"cais\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:marketing","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:marketing","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:medical_genetics","suite":["original","mmlu"],"prompt_function":"mmlu_medical_genetics","hf_repo":"cais\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:medical_genetics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:medical_genetics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:miscellaneous","suite":["original","mmlu"],"prompt_function":"mmlu_miscellaneous","hf_repo":"cais\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:miscellaneous","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:miscellaneous","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:moral_disputes","suite":["original","mmlu"],"prompt_function":"mmlu_moral_disputes","hf_repo":"cais\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:moral_disputes","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:moral_disputes","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:moral_scenarios","suite":["original","mmlu"],"prompt_function":"mmlu_moral_scenarios","hf_repo":"cais\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:moral_scenarios","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:moral_scenarios","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:nutrition","suite":["original","mmlu"],"prompt_function":"mmlu_nutrition","hf_repo":"cais\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:nutrition","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:nutrition","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:philosophy","suite":["original","mmlu"],"prompt_function":"mmlu_philosophy","hf_repo":"cais\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:philosophy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:philosophy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:prehistory","suite":["original","mmlu"],"prompt_function":"mmlu_prehistory","hf_repo":"cais\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:prehistory","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:prehistory","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_accounting","suite":["original","mmlu"],"prompt_function":"mmlu_professional_accounting","hf_repo":"cais\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_accounting","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_accounting","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_law","suite":["original","mmlu"],"prompt_function":"mmlu_professional_law","hf_repo":"cais\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_law","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_law","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_medicine","suite":["original","mmlu"],"prompt_function":"mmlu_professional_medicine","hf_repo":"cais\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_medicine","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_medicine","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_psychology","suite":["original","mmlu"],"prompt_function":"mmlu_professional_psychology","hf_repo":"cais\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_psychology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:professional_psychology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:public_relations","suite":["original","mmlu"],"prompt_function":"mmlu_public_relations","hf_repo":"cais\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:public_relations","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:public_relations","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:security_studies","suite":["original","mmlu"],"prompt_function":"mmlu_security_studies","hf_repo":"cais\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:security_studies","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:security_studies","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:sociology","suite":["original","mmlu"],"prompt_function":"mmlu_sociology","hf_repo":"cais\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:sociology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:sociology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:us_foreign_policy","suite":["original","mmlu"],"prompt_function":"mmlu_us_foreign_policy","hf_repo":"cais\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:us_foreign_policy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:us_foreign_policy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:virology","suite":["original","mmlu"],"prompt_function":"mmlu_virology","hf_repo":"cais\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:virology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:virology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:world_religions","suite":["original","mmlu"],"prompt_function":"mmlu_world_religions","hf_repo":"cais\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:world_religions","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mmlu:world_religions","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mnist_ascii","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mnist_ascii","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"modified_arithmetic","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"modified_arithmetic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"moral_permissibility","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"moral_permissibility","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"movie_dialog_same_or_different","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"movie_dialog_same_or_different","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"movie_recommendation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"movie_recommendation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mtnt2019:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mtnt2019:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mtnt2019:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mtnt2019:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mult_data_wrangling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mult_data_wrangling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"multiemo","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"multiemo","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mutual","suite":["lighteval"],"prompt_function":"mutual","hf_repo":"lighteval\/mutual_harness","hf_subset":"mutual","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["recall_at_1","recall_at_2","mrr"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"mutual_plus","suite":["lighteval"],"prompt_function":"mutual","hf_repo":"lighteval\/mutual_harness","hf_subset":"mutual_plus","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["recall_at_1","recall_at_2","mrr"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"narrativeqa","suite":["helm","helm_general"],"prompt_function":"narrativeqa","hf_repo":"lighteval/narrative_qa_helm","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"natural_instructions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"natural_instructions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"navigate","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"navigate","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"nonsense_words_grammar","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"nonsense_words_grammar","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"novel_concepts","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"novel_concepts","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:linear_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"linear_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:linear_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"linear_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:parabola_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"parabola_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:parabola_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"parabola_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:paraboloid_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"paraboloid_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:paraboloid_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"paraboloid_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:plane_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"plane_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"numeracy:plane_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"plane_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"object_counting","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"object_counting","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"odd_one_out","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"odd_one_out","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"openbookqa","suite":["helm","commonsense_scenario","helm_general"],"prompt_function":"openbookqa_helm","hf_repo":"openbookqa","hf_subset":"main","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"openbookqa","suite":["lighteval"],"prompt_function":"openbookqa","hf_repo":"openbookqa","hf_subset":"main","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"operators","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"operators","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":"([-+]?\\d+)[.]{0,1}$"} -{"name":"paragraph_segmentation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"paragraph_segmentation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"parsinlu_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"parsinlu_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"parsinlu_reading_comprehension","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"parsinlu_reading_comprehension","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+"} -{"name":"penguins_in_a_table","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"penguins_in_a_table","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"periodic_elements","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"periodic_elements","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"persian_idioms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"persian_idioms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"phrase_relatedness","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"phrase_relatedness","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"physical_intuition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physical_intuition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"physics","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physics","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"physics_questions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physics_questions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"piqa","suite":["lighteval"],"prompt_function":"piqa_harness","hf_repo":"piqa","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"piqa","suite":["helm","commonsense_scenario"],"prompt_function":"piqa_helm","hf_repo":"piqa","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"play_dialog_same_or_different","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"play_dialog_same_or_different","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"polish_sequence_labeling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"polish_sequence_labeling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"presuppositions_as_nli","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"presuppositions_as_nli","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"prost","suite":["lighteval"],"prompt_function":"prost","hf_repo":"corypaik\/prost","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"pubmedqa","suite":["lighteval"],"prompt_function":"pubmed_qa","hf_repo":"pubmed_qa","hf_subset":"pqa_labeled","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"pubmedqa","suite":["helm"],"prompt_function":"pubmed_qa_helm","hf_repo":"pubmed_qa","hf_subset":"pqa_labeled","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"qa4mre:2011","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2011.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"qa4mre:2012","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2012.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"qa4mre:2013","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2013.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"qa_wikidata","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"qa_wikidata","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleurt","bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"qasper","suite":["lighteval"],"prompt_function":"qasper","hf_repo":"qasper","hf_subset":"qasper","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["f1_score_quasi"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"qasper_ll","suite":["lighteval"],"prompt_function":"qasper_ll","hf_repo":"qasper","hf_subset":"qasper","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"quac","suite":["helm"],"prompt_function":"quac","hf_repo":"lighteval/quac_helm","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match", "quasi_exact_match", "f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"question_selection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"question_selection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"race:high","suite":["lighteval","race"],"prompt_function":"race","hf_repo":"EleutherAI/race","hf_subset":"high","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:ade_corpus_v2","suite":["helm","helm_general"],"prompt_function":"raft_ade_corpus_v2","hf_repo":"ought\/raft","hf_subset":"ade_corpus_v2","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:banking_77","suite":["helm","helm_general"],"prompt_function":"raft_banking_77","hf_repo":"ought\/raft","hf_subset":"banking_77","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:neurips_impact_statement_risks","suite":["helm","helm_general"],"prompt_function":"raft_neurips_impact_statement_risks","hf_repo":"ought\/raft","hf_subset":"neurips_impact_statement_risks","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:one_stop_english","suite":["helm","helm_general"],"prompt_function":"raft_one_stop_english","hf_repo":"ought\/raft","hf_subset":"one_stop_english","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:overruling","suite":["helm","helm_general"],"prompt_function":"raft_overruling","hf_repo":"ought\/raft","hf_subset":"overruling","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:semiconductor_org_types","suite":["helm","helm_general"],"prompt_function":"raft_semiconductor_org_types","hf_repo":"ought\/raft","hf_subset":"semiconductor_org_types","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:systematic_review_inclusion","suite":["helm","helm_general"],"prompt_function":"raft_systematic_review_inclusion","hf_repo":"ought\/raft","hf_subset":"systematic_review_inclusion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:tai_safety_research","suite":["helm","helm_general"],"prompt_function":"raft_tai_safety_research","hf_repo":"ought\/raft","hf_subset":"tai_safety_research","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:terms_of_service","suite":["helm","helm_general"],"prompt_function":"raft_terms_of_service","hf_repo":"ought\/raft","hf_subset":"terms_of_service","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:tweet_eval_hate","suite":["helm","helm_general"],"prompt_function":"raft_tweet_eval_hate","hf_repo":"ought\/raft","hf_subset":"tweet_eval_hate","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"raft:twitter_complaints","suite":["helm","helm_general"],"prompt_function":"raft_twitter_complaints","hf_repo":"ought\/raft","hf_subset":"twitter_complaints","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"real_or_fake_text","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"real_or_fake_text","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"real_toxicity_prompts","suite":["helm"],"prompt_function":"real_toxicity_prompts","hf_repo":"allenai\/real-toxicity-prompts","hf_subset":"default","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"reasoning_about_colored_objects","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"reasoning_about_colored_objects","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"repeat_copy_logic","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"repeat_copy_logic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"rephrase","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"rephrase","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"rhyming","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"rhyming","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"riddle_sense","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"riddle_sense","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"ruin_names","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"ruin_names","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"salient_translation_error_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"salient_translation_error_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"scientific_press_release","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"scientific_press_release","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"sciq","suite":["lighteval"],"prompt_function":"sciq","hf_repo":"sciq","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"semantic_parsing_in_context_sparc","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"semantic_parsing_in_context_sparc","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"semantic_parsing_spider","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"semantic_parsing_spider","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"sentence_ambiguity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sentence_ambiguity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"similarities_abstraction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"similarities_abstraction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"simp_turing_concept","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simp_turing_concept","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"simple_arithmetic_json","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"simple_arithmetic_json_multiple_choice","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json_multiple_choice","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"simple_arithmetic_json_subtasks","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json_subtasks","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"simple_arithmetic_multiple_targets_json","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_multiple_targets_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"simple_ethical_questions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_ethical_questions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"simple_text_editing","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_text_editing","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"siqa","suite":["helm","commonsense_scenario"],"prompt_function":"siqa","hf_repo":"social_i_qa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"snarks","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"snarks","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"social_iqa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"social_iqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"social_support","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"social_support","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["f1_score_macro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"sports_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sports_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"storycloze:2016","suite":["lighteval","storycloze"],"prompt_function":"storycloze","hf_repo":"story_cloze","hf_subset":"2016","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"storycloze:2018","suite":["lighteval","storycloze"],"prompt_function":"storycloze","hf_repo":"story_cloze","hf_subset":"2018","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"strange_stories","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"strange_stories","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"strategyqa","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"strategyqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"sufficient_information","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sufficient_information","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"suicide_risk","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"suicide_risk","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"summarization:cnn-dm","suite":["helm","helm_general"],"prompt_function":"cnn_dm","hf_repo":"lighteval\/summarization","hf_subset":"cnn-dm","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"summarization:xsum","suite":["helm","helm_general"],"prompt_function":"xsum","hf_repo":"lighteval\/summarization","hf_subset":"xsum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":64,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"summarization:xsum-sampled","suite":["helm"],"prompt_function":"xsum","hf_repo":"lighteval\/summarization","hf_subset":"xsum-sampled","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":64,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"super_glue:boolq","suite":["lighteval","superglue"],"prompt_function":"boolq_harness","hf_repo":"super_glue","hf_subset":"boolq","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"super_glue:cb","suite":["lighteval","superglue"],"prompt_function":"cb","hf_repo":"super_glue","hf_subset":"cb","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token", "multi_f1_numeric"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"super_glue:copa","suite":["lighteval","superglue"],"prompt_function":"copa","hf_repo":"super_glue","hf_subset":"copa","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"super_glue:rte","suite":["lighteval","superglue"],"prompt_function":"rte","hf_repo":"super_glue","hf_subset":"rte","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"super_glue:multirc","suite":["lighteval","superglue"],"prompt_function":"multirc","hf_repo":"super_glue","hf_subset":"multirc","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"super_glue:wic","suite":["lighteval","superglue"],"prompt_function":"wic","hf_repo":"super_glue","hf_subset":"wic","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"super_glue:wsc","suite":["lighteval","superglue"],"prompt_function":"wsc","hf_repo":"super_glue","hf_subset":"wsc","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"swahili_english_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"swahili_english_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"swag","suite":["lighteval"],"prompt_function":"swag","hf_repo":"swag","hf_subset":"regular","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"swedish_to_german_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"swedish_to_german_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"symbol_interpretation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"symbol_interpretation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"synthetic_reasoning:induction","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"induction","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"synthetic_reasoning:natural_easy","suite":["helm"],"prompt_function":"synthetic_reasoning_natural","hf_repo":"lighteval\/synthetic_reasoning_natural","hf_subset":"easy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"synthetic_reasoning:natural_hard","suite":["helm"],"prompt_function":"synthetic_reasoning_natural","hf_repo":"lighteval\/synthetic_reasoning_natural","hf_subset":"hard","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"synthetic_reasoning:pattern_match","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"pattern_match","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"synthetic_reasoning:variable_substitution","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"variable_substitution","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"tellmewhy","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tellmewhy","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"temporal_sequences","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"temporal_sequences","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"tense","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tense","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:arxiv","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_arxiv","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:arxiv","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"arxiv","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:bibliotik","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"bibliotik","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:bookcorpus2","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_bookcorpus2","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:books3","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_books3","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:commoncrawl","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"commoncrawl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:dm-mathematics","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_dm-mathematics","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:dm-mathematics","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"dm-mathematics","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:enron","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_enron","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:enron","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"enron","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:europarl","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_europarl","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:europarl","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"europarl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:freelaw","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_freelaw","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:freelaw","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"freelaw","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:github","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_github","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:github","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"github","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:gutenberg","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_gutenberg","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:gutenberg","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"gutenberg","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:hackernews","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_hackernews","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:hackernews","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"hackernews","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:nih-exporter","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_nih-exporter","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:nih-exporter","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"nih-exporter","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:opensubtitles","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_opensubtitles","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:opensubtitles","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"opensubtitles","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:openwebtext2","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_openwebtext2","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:openwebtext2","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"openwebtext2","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:philpapers","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_philpapers","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:pile-cc","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pile-cc","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:pubmed-abstracts","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pubmed-abstracts","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:pubmed-abstracts","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"pubmed-abstracts","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:pubmed-central","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pubmed-central","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:pubmed-central","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"pubmed-central","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:stackexchange","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_stackexchange","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:stackexchange","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"stackexchange","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:ubuntu-irc","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_ubuntu-irc","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:uspto","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_upsto","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:upsto","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"uspto","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:wikipedia","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_wikipedia","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:wikipedia","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"wikipedia","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:youtubesubtitles","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_youtubesubtitles","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"the_pile:youtubesubtitles","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"youtubesubtitles","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"timedial","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"timedial","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"toxigen","suite":["lighteval"],"prompt_function":"toxigen","hf_repo":"skg/toxigen-data","hf_subset":"annotated","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"topical_chat","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"topical_chat","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc","bleurt"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"tracking_shuffled_objects","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tracking_shuffled_objects","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"triviaqa","suite":["lighteval"],"prompt_function":"triviaqa","hf_repo":"trivia_qa","hf_subset":"rc.nocontext","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["quasi_exact_match_triviaqa"],"stop_sequence":["\n", ".", ","],"output_regex":null,"frozen":false} -{"name":"truthfulqa:gen","suite":["lighteval"],"prompt_function":"truthful_qa_generative","hf_repo":"truthful_qa","hf_subset":"generation","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"truthfulqa:mc","suite":["lighteval"],"prompt_function":"truthful_qa_multiple_choice","hf_repo":"truthful_qa","hf_subset":"multiple_choice","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["truthfulqa_mc_metrics"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"truthfulqa","suite":["helm","helm_general"],"prompt_function":"truthful_qa_helm","hf_repo":"lighteval\/truthfulqa_helm","hf_subset":"default","hf_avail_splits":["train","valid"],"evaluation_splits":["valid"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"twitterAAE:aa","suite":["helm"],"prompt_function":"twitter_aae","hf_repo":"lighteval\/twitterAAE","hf_subset":"aa","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"twitterAAE:white","suite":["helm"],"prompt_function":"twitter_aae","hf_repo":"lighteval\/twitterAAE","hf_subset":"white","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"understanding_fables","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"understanding_fables","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"undo_permutation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"undo_permutation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unit_conversion","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unit_conversion","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unit_interpretation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unit_interpretation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unnatural_in_context_learning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unnatural_in_context_learning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unscramble:anagrams1","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["mid_word_1_anagrams"],"evaluation_splits":["mid_word_1_anagrams"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unscramble:anagrams2","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["mid_word_2_anagrams"],"evaluation_splits":["mid_word_2_anagrams"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unscramble:cycle_letters","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["cycle_letters_in_word"],"evaluation_splits":["cycle_letters_in_word"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unscramble:random_insertion","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["random_insertion_in_word"],"evaluation_splits":["random_insertion_in_word"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"unscramble:reversed_words","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["reversed_words"],"evaluation_splits":["reversed_words"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"vitaminc_fact_verification","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"vitaminc_fact_verification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"webqs","suite":["lighteval"],"prompt_function":"webqs","hf_repo":"web_questions","hf_subset":"default","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"what_is_the_tao","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"what_is_the_tao","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"which_wiki_edit","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"which_wiki_edit","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:applies_to_jurisdiction","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"applies_to_jurisdiction","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:atomic_number","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"atomic_number","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:author","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"author","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:award_received","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"award_received","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:basic_form_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"basic_form_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:capital","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"capital","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:capital_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"capital_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:central_bank","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"central_bank","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:composer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"composer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:continent","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"continent","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:country","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:country_of_citizenship","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country_of_citizenship","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:country_of_origin","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country_of_origin","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:creator","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"creator","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:currency","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"currency","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:defendant","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"defendant","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:developer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"developer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:diplomatic_relation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"diplomatic_relation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:director","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"director","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:discoverer_or_inventor","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"discoverer_or_inventor","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:drug_or_therapy_used_for_treatment","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"drug_or_therapy_used_for_treatment","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:educated_at","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"educated_at","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:electron_configuration","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"electron_configuration","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:employer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"employer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:field_of_work","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"field_of_work","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:file_extension","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"file_extension","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:genetic_association","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"genetic_association","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:genre","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"genre","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:has_part","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"has_part","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:head_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"head_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:head_of_state","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"head_of_state","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:headquarters_location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"headquarters_location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:industry","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"industry","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:influenced_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"influenced_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:instance_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"instance_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:instrument","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"instrument","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:language_of_work_or_name","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"language_of_work_or_name","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:languages_spoken_written_or_signed","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"languages_spoken_written_or_signed","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:laws_applied","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"laws_applied","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:located_in_the_administrative_territorial_entity","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"located_in_the_administrative_territorial_entity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:location_of_discovery","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location_of_discovery","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:location_of_formation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location_of_formation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:majority_opinion_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"majority_opinion_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:manufacturer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"manufacturer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:measured_physical_quantity","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"measured_physical_quantity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:medical_condition_treated","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"medical_condition_treated","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:member_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:member_of_political_party","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of_political_party","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:member_of_sports_team","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of_sports_team","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:movement","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"movement","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:named_after","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"named_after","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:native_language","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"native_language","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:number_of_processor_cores","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"number_of_processor_cores","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:occupation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"occupation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:office_held_by_head_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"office_held_by_head_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:office_held_by_head_of_state","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"office_held_by_head_of_state","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:official_language","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"official_language","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:operating_system","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"operating_system","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:original_language_of_film_or_TV_show","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"original_language_of_film_or_TV_show","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:original_network","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"original_network","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:overrules","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"overrules","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:owned_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"owned_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:part_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"part_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:participating_team","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"participating_team","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:place_of_birth","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"place_of_birth","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:place_of_death","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"place_of_death","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:plaintiff","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"plaintiff","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:position_held","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"position_held","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:position_played_on_team","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"position_played_on_team","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:programming_language","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"programming_language","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:recommended_unit_of_measurement","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"recommended_unit_of_measurement","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:record_label","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"record_label","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:religion","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"religion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:repealed_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"repealed_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:shares_border_with","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"shares_border_with","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:solved_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"solved_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:statement_describes","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"statement_describes","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:stock_exchange","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"stock_exchange","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:subclass_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"subclass_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:subsidiary","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"subsidiary","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:symptoms_and_signs","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"symptoms_and_signs","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:therapeutic_area","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"therapeutic_area","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:time_of_discovery_or_invention","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"time_of_discovery_or_invention","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:twinned_administrative_body","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"twinned_administrative_body","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikifact:work_location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"work_location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikitext:2","suite":["lighteval"],"prompt_function":"wikitext","hf_repo":"wikitext","hf_subset":"wikitext-2-raw-v1","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikitext:103:document_level","suite":["harness"],"prompt_function":"wikitext_harness","hf_repo":"EleutherAI\/wikitext_document_level","hf_subset":"wikitext-103-raw-v1","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wikitext:103:document_level","suite":["helm"],"prompt_function":"wikitext_helm","hf_repo":"EleutherAI\/wikitext_document_level","hf_subset":"wikitext-103-raw-v1","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wino_x_german","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"wino_x_german","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"winogrande","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"winogrande","hf_subset":"winogrande_xl","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"winowhy","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"winowhy","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:en-hu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-hu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt08:hu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_hu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:en-hu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-hu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:en-it","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-it","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:hu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_hu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt09:it-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_it-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt10:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt11:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt12:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt13:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:en-fr","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_alphabetical","hf_repo":"wmt14","hf_subset":"fr-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:en-hi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-hi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:fr-en","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"wmt14","hf_subset":"fr-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:hi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_hi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:cs-en","suite":["helm"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"cs-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:de-en","suite":["helm"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"de-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:fr-en","suite":["helm"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"fr-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:hi-en","suite":["helm"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"hi-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt14:ru-en","suite":["helm"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"ru-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt15:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:de-en","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_alphabetical","hf_repo":"wmt16","hf_subset":"de-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-de","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"wmt16","hf_subset":"de-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-ro","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_alphabetical","hf_repo":"wmt16","hf_subset":"ro-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-ro","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-ro","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:en-tr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-tr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:ro-en","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"wmt16","hf_subset":"ro-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:ro-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_ro-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt16:tr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_tr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:en-lv","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-lv","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:en-tr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-tr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:lv-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_lv-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:tr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_tr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt17:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:en-et","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-et","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:en-tr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-tr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:et-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_et-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:tr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_tr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt18:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:cs-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_cs-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:de-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_de-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:de-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_de-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-gu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-gu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-kk","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-kk","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-lt","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-lt","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:fr-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_fr-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:gu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_gu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:kk-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_kk-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:lt-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_lt-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt19:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:de-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_de-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-iu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-iu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-km","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-km","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-pl","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-pl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-ps","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ps","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-ta","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ta","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:fr-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_fr-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:iu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_iu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:km-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_km-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:pl-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_pl-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:ps-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ps-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:ta-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ta-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wmt20:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"word_sorting","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"word_sorting","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"word_unscrambling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"word_unscrambling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"wsc273","suite":["lighteval"],"prompt_function":"wsc273","hf_repo":"winograd_wsc","hf_subset":"wsc273","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:en","suite":["lighteval"],"prompt_function":"xcopa_en","hf_repo":"xcopa","hf_subset":"default","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:et","suite":["lighteval"],"prompt_function":"xcopa_et","hf_repo":"xcopa","hf_subset":"et","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:ht","suite":["lighteval"],"prompt_function":"xcopa_ht","hf_repo":"xcopa","hf_subset":"ht","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:it","suite":["lighteval"],"prompt_function":"xcopa_it","hf_repo":"xcopa","hf_subset":"it","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:id","suite":["lighteval"],"prompt_function":"xcopa_id","hf_repo":"xcopa","hf_subset":"id","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:qu","suite":["lighteval"],"prompt_function":"xcopa_qu","hf_repo":"xcopa","hf_subset":"qu","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:sw","suite":["lighteval"],"prompt_function":"xcopa_sw","hf_repo":"xcopa","hf_subset":"sw","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:zh","suite":["lighteval"],"prompt_function":"xcopa_zh","hf_repo":"xcopa","hf_subset":"zh","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:ta","suite":["lighteval"],"prompt_function":"xcopa_ta","hf_repo":"xcopa","hf_subset":"ta","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:th","suite":["lighteval"],"prompt_function":"xcopa_th","hf_repo":"xcopa","hf_subset":"th","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:tr","suite":["lighteval"],"prompt_function":"xcopa_tr","hf_repo":"xcopa","hf_subset":"tr","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xcopa:vi","suite":["lighteval"],"prompt_function":"xcopa_vi","hf_repo":"xcopa","hf_subset":"vi","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:en","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"en","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:ru","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"ru","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:zh","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"zh","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:es","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"es","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:ar","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"ar","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:hi","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"hi","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:id","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"id","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:te","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"te","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:sw","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"sw","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:eu","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"eu","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xstory_cloze:my","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"my","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xwinograd:en","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xwinograd:fr","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xwinograd:jp","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"jp","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xwinograd:pt","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"pt","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xwinograd:ru","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} -{"name":"xwinograd:zh","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false} +{"name":"abstract_narrative_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"abstract_narrative_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"anachronisms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"anachronisms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"analogical_similarity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"analogical_similarity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"analytic_entailment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"analytic_entailment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"anli","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r1","dev_r1","train_r2","dev_r2","train_r3","dev_r3","test_r1","test_r2","test_r3"],"evaluation_splits":["test_r1","test_r2","test_r3"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"anli:r1","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r1","dev_r1","test_r1"],"evaluation_splits":["test_r1"],"few_shots_split":"train_r1","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"anli:r2","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r2","dev_r2","test_r2"],"evaluation_splits":["test_r2"],"few_shots_split":"train_r2","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"anli:r3","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r3","dev_r3","test_r3"],"evaluation_splits":["test_r3"],"few_shots_split":"train_r3","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arc:c:letters","suite":["original","arc"],"prompt_function":"arc_with_options_letters_predict","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arc:c:options","suite":["original","arc"],"prompt_function":"arc_with_options","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arc:c:simple","suite":["original","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arc:challenge","suite":["lighteval","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arc:easy","suite":["lighteval","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Easy","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:1dc","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_1dc","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:2da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:2dm","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2dm","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:2ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:3da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_3da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:3ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_3ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:4da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_4da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:4ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_4ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:5da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_5da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic:5ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_5ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"arithmetic_bb","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"arithmetic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"ascii_word_recognition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"ascii_word_recognition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"asdiv","suite":["lighteval"],"prompt_function":"asdiv","hf_repo":"EleutherAI\/asdiv","hf_subset":"asdiv","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"authorship_verification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"authorship_verification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"auto_categorization","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"auto_categorization","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"auto_debugging","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_and_after_query","hf_repo":"bigbench","hf_subset":"auto_debugging","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true} +{"name":"babi_qa","suite":["helm"],"prompt_function":"babi_qa","hf_repo":"facebook\/babi_qa","hf_subset":"en-valid-qa1","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Age","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Age","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Disability_status","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Disability_status","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Gender_identity","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Gender_identity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Nationality","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Nationality","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Physical_appearance","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Physical_appearance","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Race_ethnicity","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_ethnicity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Race_x_SES","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_x_SES","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Race_x_gender","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_x_gender","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Religion","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Religion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:SES","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"SES","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq:Sexual_orientation","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Sexual_orientation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bbq_lite_json","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"bbq_lite_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:auto_debugging","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"auto_debugging","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:age_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-age_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:age_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-age_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:disability_status_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-disability_status_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:disability_status_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-disability_status_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:gender_identity_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-gender_identity_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:gender_identity_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-gender_identity_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:nationality_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-nationality_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:nationality_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-nationality_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:physical_appearance_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-physical_appearance_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:physical_appearance_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-physical_appearance_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:race_ethnicity_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-race_ethnicity_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:race_ethnicity_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-race_ethnicity_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:religion_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-religion_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:religion_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-religion_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:ses_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-ses_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:ses_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-ses_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:sexual_orientation_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-sexual_orientation_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:bbq_lite_json:sexual_orientation_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-sexual_orientation_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:code_line_description","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"code_line_description","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conceptual_combinations:contradictions","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-contradictions","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conceptual_combinations:emergent_properties","suite":["helm"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-emergent_properties","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conceptual_combinations:fanciful_fictional_combinations","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-fanciful_fictional_combinations","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conceptual_combinations:homonyms","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-homonyms","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conceptual_combinations:invented_words","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-invented_words","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:adna_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-adna_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:adna_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-adna_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:atikampe_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-atikampe_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:atikampe_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-atikampe_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:gornam_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-gornam_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:gornam_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-gornam_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:holuan_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-holuan_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:holuan_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-holuan_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:mkafala_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-mkafala_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:mkafala_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-mkafala_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:postpositive_english_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-postpositive_english_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:postpositive_english_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-postpositive_english_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:unapuri_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-unapuri_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:unapuri_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-unapuri_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:vaomi_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-vaomi_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:conlang_translation:vaomi_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-vaomi_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:emoji_movie","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"emoji_movie","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:formal_fallacies_syllogisms_negation","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"formal_fallacies_syllogisms_negation","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:hindu_knowledge","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"hindu_knowledge","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:known_unknowns","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"known_unknowns","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:language_identification","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"language_identification","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:linguistics_puzzles","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"linguistics_puzzles","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:logic_grid_puzzle","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logic_grid_puzzle","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:logical_deduction-five_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-five_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:logical_deduction-seven_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-seven_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:logical_deduction-three_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-three_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:misconceptions_russian","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"misconceptions_russian","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:novel_concepts","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"novel_concepts","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:operators","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"operators","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:parsinlu_reading_comprehension","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"parsinlu_reading_comprehension","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:play_dialog_same_or_different","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"play_dialog_same_or_different","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:repeat_copy_logic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"repeat_copy_logic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:strange_stories-boolean","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strange_stories-boolean","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:strange_stories-multiple_choice","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strange_stories-multiple_choice","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:strategyqa","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strategyqa","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:symbol_interpretation-adversarial","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-adversarial","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:symbol_interpretation-emoji_agnostic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-emoji_agnostic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:symbol_interpretation-name_agnostic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-name_agnostic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:symbol_interpretation-plain","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-plain","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:symbol_interpretation-tricky","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-tricky","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:vitaminc_fact_verification","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"vitaminc_fact_verification","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bigbench:winowhy","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"winowhy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:adjunct_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"adjunct_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:adjunct_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"adjunct_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:anaphor_gender_agreement","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"anaphor_gender_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:anaphor_gender_agreement","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"anaphor_gender_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:anaphor_number_agreement","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"anaphor_number_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:anaphor_number_agreement","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"anaphor_number_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:animate_subject_passive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"animate_subject_passive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:animate_subject_passive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"animate_subject_passive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:animate_subject_trans","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"animate_subject_trans","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:animate_subject_trans","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"animate_subject_trans","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:causative","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"causative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:causative","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"causative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:complex_NP_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"complex_NP_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:complex_NP_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"complex_NP_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:coordinate_structure_constraint_complex_left_branch","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_complex_left_branch","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:coordinate_structure_constraint_complex_left_branch","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_complex_left_branch","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:coordinate_structure_constraint_object_extraction","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_object_extraction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:coordinate_structure_constraint_object_extraction","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_object_extraction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_irregular_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_irregular_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_irregular_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_irregular_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adj_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adj_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adj_irregular_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adj_irregular_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adj_irregular_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adj_irregular_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adjective_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adjective_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:determiner_noun_agreement_with_adjective_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adjective_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:distractor_agreement_relational_noun","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"distractor_agreement_relational_noun","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:distractor_agreement_relational_noun","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"distractor_agreement_relational_noun","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:distractor_agreement_relative_clause","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"distractor_agreement_relative_clause","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:distractor_agreement_relative_clause","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"distractor_agreement_relative_clause","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:drop_argument","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"drop_argument","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:drop_argument","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"drop_argument","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:ellipsis_n_bar_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:ellipsis_n_bar_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:ellipsis_n_bar_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:ellipsis_n_bar_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_object_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_object_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_quantifiers_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_quantifiers_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_quantifiers_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_quantifiers_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_subject_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_subject_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:existential_there_subject_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_subject_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:expletive_it_object_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"expletive_it_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:expletive_it_object_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"expletive_it_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:inchoative","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"inchoative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:inchoative","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"inchoative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:intransitive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"intransitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:intransitive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"intransitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_past_participle_adjectives","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_past_participle_adjectives","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_past_participle_adjectives","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_past_participle_adjectives","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_past_participle_verbs","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_past_participle_verbs","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_past_participle_verbs","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_past_participle_verbs","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_plural_subject_verb_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_plural_subject_verb_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_plural_subject_verb_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:irregular_plural_subject_verb_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:left_branch_island_echo_question","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"left_branch_island_echo_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:left_branch_island_echo_question","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"left_branch_island_echo_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:left_branch_island_simple_question","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"left_branch_island_simple_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:left_branch_island_simple_question","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"left_branch_island_simple_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:matrix_question_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"matrix_question_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:matrix_question_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"matrix_question_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:npi_present_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"npi_present_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:npi_present_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"npi_present_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:npi_present_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"npi_present_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:npi_present_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"npi_present_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:only_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"only_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:only_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"only_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:only_npi_scope","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"only_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:only_npi_scope","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"only_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:passive_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"passive_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:passive_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"passive_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:passive_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"passive_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:passive_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"passive_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_c_command","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_c_command","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_c_command","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_c_command","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_case_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_case_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_case_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_case_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_case_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_case_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_case_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_case_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_domain_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_domain_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_domain_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_domain_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_domain_3","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_3","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_domain_3","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_3","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_reconstruction","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_reconstruction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:principle_A_reconstruction","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_reconstruction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:regular_plural_subject_verb_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:regular_plural_subject_verb_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:regular_plural_subject_verb_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:regular_plural_subject_verb_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:sentential_negation_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_negation_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:sentential_negation_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_negation_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:sentential_negation_npi_scope","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_negation_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:sentential_negation_npi_scope","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_negation_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:sentential_subject_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_subject_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:sentential_subject_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_subject_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:superlative_quantifiers_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"superlative_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:superlative_quantifiers_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"superlative_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:superlative_quantifiers_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"superlative_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:superlative_quantifiers_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"superlative_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:tough_vs_raising_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"tough_vs_raising_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:tough_vs_raising_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"tough_vs_raising_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:tough_vs_raising_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"tough_vs_raising_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:tough_vs_raising_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"tough_vs_raising_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:transitive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"transitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:transitive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"transitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_questions_object_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_object_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_questions_object_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_object_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_questions_subject_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_questions_subject_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_questions_subject_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_questions_subject_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_no_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_no_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_no_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_no_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_with_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_with_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_with_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"blimp:wh_vs_that_with_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bold","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bold:gender","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"gender","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bold:political_ideology","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"political_ideology","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bold:profession","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"profession","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bold:race","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"race","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bold:religious_ideology","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"religious_ideology","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"boolq","suite":["helm","helm_general"],"prompt_function":"boolq_helm","hf_repo":"lighteval\/boolq_helm","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"boolq:contrastset","suite":["helm"],"prompt_function":"boolq_helm_contrastset","hf_repo":"lighteval\/boolq_helm","hf_subset":"default","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"bridging_anaphora_resolution_barqa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"bridging_anaphora_resolution_barqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"causal_judgment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"causal_judgment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"cause_and_effect","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cause_and_effect","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"checkmate_in_one","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"checkmate_in_one","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"chess_state_tracking","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"chess_state_tracking","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"chinese_remainder_theorem","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"chinese_remainder_theorem","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"cifar10_classification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cifar10_classification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments","suite":["helm","helm_general"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:LGBTQ","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"LGBTQ","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:black","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"black","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:christian","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"christian","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:female","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"female","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:male","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"male","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:muslim","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"muslim","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:other_religions","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"other_religions","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"civil_comments:white","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"white","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"code_line_description","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_and_after_query","hf_repo":"bigbench","hf_subset":"code_line_description","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"codenames","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"codenames","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"color","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"color","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"common_morpheme","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"common_morpheme","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"commonsenseqa","suite":["helm","commonsense_scenario"],"prompt_function":"commonsense_qa","hf_repo":"commonsense_qa","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"conceptual_combinations","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"conceptual_combinations","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"conlang_translation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"conlang_translation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge_t5","bleu","perfect_exact_match"],"stop_sequence":[".",";","!","?"],"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true} +{"name":"contextual_parametric_knowledge_conflicts","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"contextual_parametric_knowledge_conflicts","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:oh_the_places","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"oh_the_places","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:pilot","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"pilot","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:popular_books-prefix_length_10","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_10","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:popular_books-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:popular_books-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:popular_books-prefix_length_250","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_250","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:popular_books-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:popular_books-prefix_length_50","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_50","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:prompt_num_line_1-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_1-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:prompt_num_line_10-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_10-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"copyright:prompt_num_line_5-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_5-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"coqa","suite":["lighteval"],"prompt_function":"coqa","hf_repo":"coqa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["perfect_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"coqa_bb","suite":["lighteval","bigbench_programmatic","bigbench"],"prompt_function":"coqa","hf_repo":"coqa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["perfect_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"covid_dialogue","suite":["helm"],"prompt_function":"covid_dialogue","hf_repo":"lighteval\/covid_dialogue","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"crash_blossom","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"crash_blossom","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"crass_ai","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"crass_ai","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"cryobiology_spanish","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cryobiology_spanish","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"cryptonite","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cryptonite","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"cs_algorithms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cs_algorithms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"dark_humor_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"dark_humor_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"date_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"date_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"disambiguation_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"disambiguation_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"discourse_marker_prediction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"discourse_marker_prediction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"disfl_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"disfl_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"drop","suite":["lighteval"],"prompt_function":"drop","hf_repo":"lighteval/drop_harness","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":"train","few_shots_select":"random_sampling_from_train","generation_size":null,"metric":["drop"],"stop_sequence":["."],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"dyck_language:2","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"2","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"dyck_language:3","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"3","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"dyck_language:4","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"4","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"dyck_languages","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"dyck_languages","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"elementary_math_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"elementary_math_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"emoji_movie","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"emoji_movie","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"emojis_emotion_prediction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"emojis_emotion_prediction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"empirical_judgments","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"empirical_judgments","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"english_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"english_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"english_russian_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"english_russian_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entailed_polarity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"entailed_polarity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entailed_polarity_hindi","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"entailed_polarity_hindi","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_data_imputation:Buy","suite":["helm"],"prompt_function":"entity_data_imputation","hf_repo":"lighteval\/Buy","hf_subset":"default","hf_avail_splits":["train","test","valid"],"evaluation_splits":["valid","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_data_imputation:Restaurant","suite":["helm"],"prompt_function":"entity_data_imputation","hf_repo":"lighteval\/Restaurant","hf_subset":"default","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Abt_Buy","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Abt_Buy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Amazon_Google","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Amazon_Google","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Beer","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Beer","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Company","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Company","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:DBLP_ACM","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"DBLP_ACM","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:DBLP_GoogleScholar","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"DBLP_GoogleScholar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Dirty_DBLP_ACM","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_DBLP_ACM","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Dirty_DBLP_GoogleScholar","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_DBLP_GoogleScholar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Dirty_Walmart_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_Walmart_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Dirty_iTunes_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_iTunes_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Fodors_Zagats","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Fodors_Zagats","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:Walmart_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Walmart_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"entity_matching:iTunes_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"iTunes_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"epistemic_reasoning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"epistemic_reasoning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"ethics:commonsense","suite":["lighteval","ethics"],"prompt_function":"ethics_commonsense","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"commonsense","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"ethics:deontology","suite":["lighteval","ethics"],"prompt_function":"ethics_deontology","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"deontology","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"ethics:justice","suite":["lighteval","ethics"],"prompt_function":"ethics_justice","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"justice","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"ethics:utilitarianism","suite":["lighteval","ethics"],"prompt_function":"ethics_utilitarianism","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"utilitarianism","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"ethics:virtue","suite":["lighteval","ethics"],"prompt_function":"ethics_virtue","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"virtue","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"evaluating_information_essentiality","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"evaluating_information_essentiality","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"fact_checker","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"fact_checker","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"fantasy_reasoning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"fantasy_reasoning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"few_shot_nlg","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"few_shot_nlg","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","bleurt"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"figure_of_speech_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"figure_of_speech_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"formal_fallacies_syllogisms_negation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"formal_fallacies_syllogisms_negation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"gem","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gem","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"gender_inclusive_sentences_german","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gender_inclusive_sentences_german","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"general_knowledge","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"general_knowledge","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"geometric_shapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"geometric_shapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:cola","suite":["lighteval","glue"],"prompt_function":"cola","hf_repo":"glue","hf_subset":"cola","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token", "mcc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:mnli","suite":["lighteval","glue"],"prompt_function":"mnli","hf_repo":"glue","hf_subset":"mnli_matched","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:mnli_mismatched","suite":["lighteval","glue"],"prompt_function":"mnli","hf_repo":"glue","hf_subset":"mnli_mismatched","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:mrpc","suite":["lighteval","glue"],"prompt_function":"mrpc","hf_repo":"glue","hf_subset":"mrpc","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_f1"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:qnli","suite":["lighteval","glue"],"prompt_function":"qnli","hf_repo":"glue","hf_subset":"qnli","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:qqp","suite":["lighteval","glue"],"prompt_function":"qqp","hf_repo":"glue","hf_subset":"qqp","hf_avail_splits":["train","validation","test"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_f1"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:rte","suite":["lighteval","glue"],"prompt_function":"rte","hf_repo":"glue","hf_subset":"rte","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:sst2","suite":["lighteval","glue"],"prompt_function":"sst","hf_repo":"glue","hf_subset":"sst2","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:stsb","suite":["lighteval","glue"],"prompt_function":"stsb","hf_repo":"glue","hf_subset":"stsb","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"glue:wnli","suite":["lighteval","glue"],"prompt_function":"wnli","hf_repo":"glue","hf_subset":"wnli","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"goal_step_wikihow","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"goal_step_wikihow","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"gpqa","suite":["lighteval"],"prompt_function":"gpqa","hf_repo":"Idavidrein/gpqa","hf_subset":"gpqa_main","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"gre_reading_comprehension","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gre_reading_comprehension","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"gsm8k","suite":["lighteval"],"prompt_function":"gsm8k","hf_repo":"gsm8k","hf_subset":"main","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":256,"metric":["quasi_exact_match_gsm8k"],"stop_sequence":[":","Question:", "Question"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"headqa:en","suite":["lighteval","headqa"],"prompt_function":"headqa","hf_repo":"lighteval/headqa_harness","hf_subset":"en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"headqa:es","suite":["lighteval","headqa"],"prompt_function":"headqa","hf_repo":"lighteval/headqa_harness","hf_subset":"es","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"hellaswag","suite":["lighteval"],"prompt_function":"hellaswag_harness","hf_repo":"hellaswag","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"hellaswag","suite":["helm","helm_general"],"prompt_function":"hellaswag_helm","hf_repo":"hellaswag","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"hhh_alignment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hhh_alignment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"hindi_question_answering","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hindi_question_answering","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"hindu_knowledge","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"hindu_knowledge","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"hinglish_toxicity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hinglish_toxicity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"human_organs_senses","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"human_organs_senses","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"humaneval","suite":["helm","code_scenario"],"prompt_function":"humaneval","hf_repo":"openai_humaneval","hf_subset":"openai_humaneval","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":600,"metric":["code_humaneval"],"stop_sequence":["\nclass","\ndef","\nif","\nprint"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"hyperbaton","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hyperbaton","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"identify_math_theorems","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"identify_math_theorems","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"identify_odd_metaphor","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"identify_odd_metaphor","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"imdb","suite":["helm","helm_general"],"prompt_function":"imdb","hf_repo":"lighteval\/IMDB_helm","hf_subset":"default","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"imdb:contrastset","suite":["helm"],"prompt_function":"imdb_contrastset","hf_repo":"lighteval\/IMDB_helm","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"implicatures","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"implicatures","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"implicit_relations","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"implicit_relations","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"intent_recognition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"intent_recognition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"interactive_qa_mmlu:abstract_algebra","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_abstract_algebra","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"interactive_qa_mmlu:college_chemistry","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_college_chemistry","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"interactive_qa_mmlu:global_facts","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_global_facts","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"interactive_qa_mmlu:miscellaneous","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_miscellaneous","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"interactive_qa_mmlu:nutrition","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_nutrition","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"interactive_qa_mmlu:us_foreign_policy","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_us_foreign_policy","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"international_phonetic_alphabet_nli","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"international_phonetic_alphabet_nli","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"international_phonetic_alphabet_transliterate","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"international_phonetic_alphabet_transliterate","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"intersect_geometry","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"intersect_geometry","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"irony_identification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"irony_identification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:ar-en","suite":["lighteval","harness_selection"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ar-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:en-ar","suite":["lighteval","harness_selection"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ar-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:en-ko","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-ko","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:ko-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ko-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"iwslt17:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"kanji_ascii","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"kanji_ascii","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"kannada","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"kannada","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"key_value_maps","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"key_value_maps","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"known_unknowns","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"known_unknowns","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:standard","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"lambada","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:standard_cloze","suite":["lighteval","lambada"],"prompt_function":"lambada_cloze","hf_repo":"lambada","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:openai","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:openai:de","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:openai:en","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:openai:es","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:openai:fr","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:openai:it","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"it","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lambada:openai_cloze","suite":["lighteval","lambada"],"prompt_function":"lambada_cloze","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"language_games","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"language_games","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"language_identification","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"language_identification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"legal_summarization:billsum","suite":["helm"],"prompt_function":"legal_summarization","hf_repo":"lighteval\/legal_summarization","hf_subset":"BillSum","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1024,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"legal_summarization:eurlexsum","suite":["helm"],"prompt_function":"legal_summarization","hf_repo":"lighteval\/legal_summarization","hf_subset":"EurLexSum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"legal_summarization:multilexsum","suite":["helm"],"prompt_function":"multilexsum","hf_repo":"lighteval\/legal_summarization","hf_subset":"MultiLexSum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":256,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"legalsupport","suite":["helm"],"prompt_function":"legal_support","hf_repo":"lighteval\/LegalSupport","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lexglue:case_hold","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_case_hold","hf_repo":"lighteval\/lexglue","hf_subset":"case_hold","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lexglue:ecthr_a","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ecthr_a","hf_repo":"lighteval\/lexglue","hf_subset":"ecthr_a","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lexglue:ecthr_b","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ecthr_b","hf_repo":"lighteval\/lexglue","hf_subset":"ecthr_b","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lexglue:eurlex","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_eurlex","hf_repo":"lighteval\/lexglue","hf_subset":"eurlex","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lexglue:ledgar","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ledgar","hf_repo":"lighteval\/lexglue","hf_subset":"ledgar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lexglue:scotus","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_scotus","hf_repo":"lighteval\/lexglue","hf_subset":"scotus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lexglue:unfair_tos","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_unfair_tos","hf_repo":"lighteval\/lexglue","hf_subset":"unfair_tos","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:brazilian_court_decisions_judgment","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_brazilian_court_decisions_judgment","hf_repo":"lighteval\/lextreme","hf_subset":"brazilian_court_decisions_judgment","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:brazilian_court_decisions_unanimity","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_brazilian_court_decisions_unanimity","hf_repo":"lighteval\/lextreme","hf_subset":"brazilian_court_decisions_unanimity","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:covid19_emergency_event","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_covid19_emergency_event","hf_repo":"lighteval\/lextreme","hf_subset":"covid19_emergency_event","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:german_argument_mining","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_german_argument_mining","hf_repo":"lighteval\/lextreme","hf_subset":"german_argument_mining","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:greek_legal_code_chapter","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_chapter","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_chapter","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:greek_legal_code_subject","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_subject","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_subject","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:greek_legal_code_volume","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_volume","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_volume","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:greek_legal_ner","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_ner","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_ner","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":430,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:legalnero","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_legalnero","hf_repo":"lighteval\/lextreme","hf_subset":"legalnero","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":788,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:lener_br","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_lener_br","hf_repo":"lighteval\/lextreme","hf_subset":"lener_br","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":338,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:mapa_coarse","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_mapa_coarse","hf_repo":"lighteval\/lextreme","hf_subset":"mapa_coarse","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":274,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:mapa_fine","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_mapa_fine","hf_repo":"lighteval\/lextreme","hf_subset":"mapa_fine","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":274,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:multi_eurlex_level_1","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_1","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_1","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:multi_eurlex_level_2","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_2","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_2","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:multi_eurlex_level_3","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_3","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_3","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:online_terms_of_service_clause_topics","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_online_terms_of_service_clause_topics","hf_repo":"lighteval\/lextreme","hf_subset":"online_terms_of_service_clause_topics","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:online_terms_of_service_unfairness_levels","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_online_terms_of_service_unfairness_levels","hf_repo":"lighteval\/lextreme","hf_subset":"online_terms_of_service_unfairness_levels","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lextreme:swiss_judgment_prediction","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_swiss_judgment_prediction","hf_repo":"lighteval\/lextreme","hf_subset":"swiss_judgment_prediction","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"linguistic_mappings","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"linguistic_mappings","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"linguistics_puzzles","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"linguistics_puzzles","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true} +{"name":"logic_grid_puzzle","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logic_grid_puzzle","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"logical_args","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_args","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"logical_deduction","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"logical_deduction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"logical_fallacy_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_fallacy_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"logical_sequence","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_sequence","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"logiqa","suite":["lighteval"],"prompt_function":"logiqa","hf_repo":"lighteval/logiqa_harness","hf_subset":"logiqa","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lsat_qa","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"all","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lsat_qa:assignment","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"assignment","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lsat_qa:grouping","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"grouping","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lsat_qa:miscellaneous","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"miscellaneous","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"lsat_qa:ordering","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"ordering","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:geometry","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:number_theory","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:prealgebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:precalculus","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mathematical_induction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mathematical_induction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mathqa","suite":["lighteval"],"prompt_function":"mathqa","hf_repo":"math_qa","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"matrixshapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"matrixshapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"me_q_sum","suite":["helm"],"prompt_function":"me_q_sum","hf_repo":"lighteval\/me_q_sum","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"med_dialog:healthcaremagic","suite":["helm"],"prompt_function":"med_dialog","hf_repo":"lighteval\/med_dialog","hf_subset":"healthcaremagic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"med_dialog:icliniq","suite":["helm"],"prompt_function":"med_dialog","hf_repo":"lighteval\/med_dialog","hf_subset":"icliniq","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"med_mcqa","suite":["helm"],"prompt_function":"med_mcqa","hf_repo":"lighteval\/med_mcqa","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"med_paragraph_simplification","suite":["helm"],"prompt_function":"med_paragraph_simplification","hf_repo":"lighteval\/med_paragraph_simplification","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":512,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"med_qa","suite":["helm"],"prompt_function":"med_qa","hf_repo":"bigbio\/med_qa","hf_subset":"med_qa_en_source","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"metaphor_boolean","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"metaphor_boolean","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"metaphor_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"metaphor_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:en","suite":["lighteval"],"prompt_function":"mgsm_en","hf_repo":"juletxara/mgsm","hf_subset":"en","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Question:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:es","suite":["lighteval"],"prompt_function":"mgsm_es","hf_repo":"juletxara/mgsm","hf_subset":"es","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Pregunta:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:fr","suite":["lighteval"],"prompt_function":"mgsm_fr","hf_repo":"juletxara/mgsm","hf_subset":"fr","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Question:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:de","suite":["lighteval"],"prompt_function":"mgsm_de","hf_repo":"juletxara/mgsm","hf_subset":"de","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Frage:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:ru","suite":["lighteval"],"prompt_function":"mgsm_ru","hf_repo":"juletxara/mgsm","hf_subset":"ru","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0417\u0430\u0434\u0430\u0447\u0430:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:zh","suite":["lighteval"],"prompt_function":"mgsm_zh","hf_repo":"juletxara/mgsm","hf_subset":"zh","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u95ee\u9898:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:ja","suite":["lighteval"],"prompt_function":"mgsm_ja","hf_repo":"juletxara/mgsm","hf_subset":"ja","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u554f\u984c:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:th","suite":["lighteval"],"prompt_function":"mgsm_th","hf_repo":"juletxara/mgsm","hf_subset":"th","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0e42\u0e08\u0e17\u0e22\u0e4c:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:sw","suite":["lighteval"],"prompt_function":"mgsm_sw","hf_repo":"juletxara/mgsm","hf_subset":"sw","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Swali:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:bn","suite":["lighteval"],"prompt_function":"mgsm_bn","hf_repo":"juletxara/mgsm","hf_subset":"bn","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mgsm:te","suite":["lighteval"],"prompt_function":"mgsm_te","hf_repo":"juletxara/mgsm","hf_subset":"te","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28:"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"minute_mysteries_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"minute_mysteries_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"misconceptions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"misconceptions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"misconceptions_russian","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"misconceptions_russian","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"all","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu","suite":["original"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"all","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:abstract_algebra","suite":["original","mmlu"],"prompt_function":"mmlu_abstract_algebra","hf_repo":"cais\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:abstract_algebra","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:abstract_algebra","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:anatomy","suite":["original","mmlu"],"prompt_function":"mmlu_anatomy","hf_repo":"cais\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:anatomy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:anatomy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:astronomy","suite":["original","mmlu"],"prompt_function":"mmlu_astronomy","hf_repo":"cais\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:astronomy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:astronomy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:business_ethics","suite":["original","mmlu"],"prompt_function":"mmlu_business_ethics","hf_repo":"cais\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:business_ethics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:business_ethics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:clinical_knowledge","suite":["original","mmlu"],"prompt_function":"mmlu_clinical_knowledge","hf_repo":"cais\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:clinical_knowledge","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:clinical_knowledge","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_biology","suite":["original","mmlu"],"prompt_function":"mmlu_college_biology","hf_repo":"cais\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_biology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_biology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_chemistry","suite":["original","mmlu"],"prompt_function":"mmlu_college_chemistry","hf_repo":"cais\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_chemistry","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_chemistry","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_computer_science","suite":["original","mmlu"],"prompt_function":"mmlu_college_computer_science","hf_repo":"cais\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_computer_science","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_computer_science","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_college_mathematics","hf_repo":"cais\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_mathematics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_medicine","suite":["original","mmlu"],"prompt_function":"mmlu_college_medicine","hf_repo":"cais\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_medicine","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_medicine","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_physics","suite":["original","mmlu"],"prompt_function":"mmlu_college_physics","hf_repo":"cais\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_physics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:college_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:computer_security","suite":["original","mmlu"],"prompt_function":"mmlu_computer_security","hf_repo":"cais\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:computer_security","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:computer_security","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:conceptual_physics","suite":["original","mmlu"],"prompt_function":"mmlu_conceptual_physics","hf_repo":"cais\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:conceptual_physics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:conceptual_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:econometrics","suite":["original","mmlu"],"prompt_function":"mmlu_econometrics","hf_repo":"cais\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:econometrics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:econometrics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:electrical_engineering","suite":["original","mmlu"],"prompt_function":"mmlu_electrical_engineering","hf_repo":"cais\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:electrical_engineering","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:electrical_engineering","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:elementary_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_elementary_mathematics","hf_repo":"cais\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:elementary_mathematics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:elementary_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:formal_logic","suite":["original","mmlu"],"prompt_function":"mmlu_formal_logic","hf_repo":"cais\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:formal_logic","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:formal_logic","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:global_facts","suite":["original","mmlu"],"prompt_function":"mmlu_global_facts","hf_repo":"cais\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:global_facts","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:global_facts","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_biology","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_biology","hf_repo":"cais\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_biology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_biology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_chemistry","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_chemistry","hf_repo":"cais\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_chemistry","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_chemistry","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_computer_science","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_computer_science","hf_repo":"cais\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_computer_science","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_computer_science","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_european_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_european_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_european_history","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_european_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_geography","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_geography","hf_repo":"cais\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_geography","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_geography","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_government_and_politics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_government_and_politics","hf_repo":"cais\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_government_and_politics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_government_and_politics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_macroeconomics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_macroeconomics","hf_repo":"cais\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_macroeconomics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_macroeconomics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_mathematics","hf_repo":"cais\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_mathematics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_microeconomics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_microeconomics","hf_repo":"cais\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_microeconomics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_microeconomics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_physics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_physics","hf_repo":"cais\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_physics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_psychology","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_psychology","hf_repo":"cais\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_psychology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_psychology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_statistics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_statistics","hf_repo":"cais\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_statistics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_statistics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_us_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_us_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_us_history","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_us_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_world_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_world_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_world_history","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:high_school_world_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:human_aging","suite":["original","mmlu"],"prompt_function":"mmlu_human_aging","hf_repo":"cais\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:human_aging","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:human_aging","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:human_sexuality","suite":["original","mmlu"],"prompt_function":"mmlu_human_sexuality","hf_repo":"cais\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:human_sexuality","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:human_sexuality","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:international_law","suite":["original","mmlu"],"prompt_function":"mmlu_international_law","hf_repo":"cais\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:international_law","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:international_law","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:jurisprudence","suite":["original","mmlu"],"prompt_function":"mmlu_jurisprudence","hf_repo":"cais\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:jurisprudence","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:jurisprudence","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:logical_fallacies","suite":["original","mmlu"],"prompt_function":"mmlu_logical_fallacies","hf_repo":"cais\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:logical_fallacies","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:logical_fallacies","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:machine_learning","suite":["original","mmlu"],"prompt_function":"mmlu_machine_learning","hf_repo":"cais\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:machine_learning","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:machine_learning","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:management","suite":["original","mmlu"],"prompt_function":"mmlu_management","hf_repo":"cais\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:management","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:management","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:marketing","suite":["original","mmlu"],"prompt_function":"mmlu_marketing","hf_repo":"cais\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:marketing","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:marketing","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:medical_genetics","suite":["original","mmlu"],"prompt_function":"mmlu_medical_genetics","hf_repo":"cais\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:medical_genetics","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:medical_genetics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:miscellaneous","suite":["original","mmlu"],"prompt_function":"mmlu_miscellaneous","hf_repo":"cais\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:miscellaneous","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:miscellaneous","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:moral_disputes","suite":["original","mmlu"],"prompt_function":"mmlu_moral_disputes","hf_repo":"cais\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:moral_disputes","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:moral_disputes","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:moral_scenarios","suite":["original","mmlu"],"prompt_function":"mmlu_moral_scenarios","hf_repo":"cais\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:moral_scenarios","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:moral_scenarios","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:nutrition","suite":["original","mmlu"],"prompt_function":"mmlu_nutrition","hf_repo":"cais\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:nutrition","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:nutrition","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:philosophy","suite":["original","mmlu"],"prompt_function":"mmlu_philosophy","hf_repo":"cais\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:philosophy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:philosophy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:prehistory","suite":["original","mmlu"],"prompt_function":"mmlu_prehistory","hf_repo":"cais\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:prehistory","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:prehistory","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_accounting","suite":["original","mmlu"],"prompt_function":"mmlu_professional_accounting","hf_repo":"cais\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_accounting","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_accounting","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_law","suite":["original","mmlu"],"prompt_function":"mmlu_professional_law","hf_repo":"cais\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_law","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_law","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_medicine","suite":["original","mmlu"],"prompt_function":"mmlu_professional_medicine","hf_repo":"cais\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_medicine","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_medicine","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_psychology","suite":["original","mmlu"],"prompt_function":"mmlu_professional_psychology","hf_repo":"cais\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_psychology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:professional_psychology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:public_relations","suite":["original","mmlu"],"prompt_function":"mmlu_public_relations","hf_repo":"cais\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:public_relations","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:public_relations","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:security_studies","suite":["original","mmlu"],"prompt_function":"mmlu_security_studies","hf_repo":"cais\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:security_studies","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:security_studies","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:sociology","suite":["original","mmlu"],"prompt_function":"mmlu_sociology","hf_repo":"cais\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:sociology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:sociology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:us_foreign_policy","suite":["original","mmlu"],"prompt_function":"mmlu_us_foreign_policy","hf_repo":"cais\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:us_foreign_policy","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:us_foreign_policy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:virology","suite":["original","mmlu"],"prompt_function":"mmlu_virology","hf_repo":"cais\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:virology","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:virology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:world_religions","suite":["original","mmlu"],"prompt_function":"mmlu_world_religions","hf_repo":"cais\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:world_religions","suite":["lighteval","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mmlu:world_religions","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mnist_ascii","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mnist_ascii","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"modified_arithmetic","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"modified_arithmetic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"moral_permissibility","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"moral_permissibility","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"movie_dialog_same_or_different","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"movie_dialog_same_or_different","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"movie_recommendation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"movie_recommendation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mtnt2019:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mtnt2019:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mtnt2019:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mtnt2019:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mult_data_wrangling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mult_data_wrangling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"multiemo","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"multiemo","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mutual","suite":["lighteval"],"prompt_function":"mutual","hf_repo":"lighteval\/mutual_harness","hf_subset":"mutual","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["recall_at_1","recall_at_2","mrr"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"mutual_plus","suite":["lighteval"],"prompt_function":"mutual","hf_repo":"lighteval\/mutual_harness","hf_subset":"mutual_plus","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["recall_at_1","recall_at_2","mrr"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"narrativeqa","suite":["helm","helm_general"],"prompt_function":"narrativeqa","hf_repo":"lighteval/narrative_qa_helm","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"natural_instructions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"natural_instructions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"navigate","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"navigate","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"nonsense_words_grammar","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"nonsense_words_grammar","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"novel_concepts","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"novel_concepts","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:linear_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"linear_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:linear_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"linear_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:parabola_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"parabola_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:parabola_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"parabola_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:paraboloid_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"paraboloid_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:paraboloid_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"paraboloid_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:plane_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"plane_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"numeracy:plane_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"plane_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"object_counting","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"object_counting","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"odd_one_out","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"odd_one_out","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"openbookqa","suite":["helm","commonsense_scenario","helm_general"],"prompt_function":"openbookqa_helm","hf_repo":"openbookqa","hf_subset":"main","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"openbookqa","suite":["lighteval"],"prompt_function":"openbookqa","hf_repo":"openbookqa","hf_subset":"main","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"operators","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"operators","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":"([-+]?\\d+)[.]{0,1}$", "trust_dataset": true} +{"name":"paragraph_segmentation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"paragraph_segmentation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"parsinlu_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"parsinlu_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"parsinlu_reading_comprehension","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"parsinlu_reading_comprehension","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true} +{"name":"penguins_in_a_table","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"penguins_in_a_table","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"periodic_elements","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"periodic_elements","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"persian_idioms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"persian_idioms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"phrase_relatedness","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"phrase_relatedness","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"physical_intuition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physical_intuition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"physics","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physics","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"physics_questions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physics_questions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"piqa","suite":["lighteval"],"prompt_function":"piqa_harness","hf_repo":"piqa","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"piqa","suite":["helm","commonsense_scenario"],"prompt_function":"piqa_helm","hf_repo":"piqa","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"play_dialog_same_or_different","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"play_dialog_same_or_different","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"polish_sequence_labeling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"polish_sequence_labeling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"presuppositions_as_nli","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"presuppositions_as_nli","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"prost","suite":["lighteval"],"prompt_function":"prost","hf_repo":"corypaik\/prost","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"pubmedqa","suite":["lighteval"],"prompt_function":"pubmed_qa","hf_repo":"pubmed_qa","hf_subset":"pqa_labeled","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"pubmedqa","suite":["helm"],"prompt_function":"pubmed_qa_helm","hf_repo":"pubmed_qa","hf_subset":"pqa_labeled","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"qa4mre:2011","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2011.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"qa4mre:2012","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2012.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"qa4mre:2013","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2013.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"qa_wikidata","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"qa_wikidata","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleurt","bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"qasper","suite":["lighteval"],"prompt_function":"qasper","hf_repo":"qasper","hf_subset":"qasper","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["f1_score_quasi"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"qasper_ll","suite":["lighteval"],"prompt_function":"qasper_ll","hf_repo":"qasper","hf_subset":"qasper","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"quac","suite":["helm"],"prompt_function":"quac","hf_repo":"lighteval/quac_helm","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match", "quasi_exact_match", "f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"question_selection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"question_selection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"race:high","suite":["lighteval","race"],"prompt_function":"race","hf_repo":"EleutherAI/race","hf_subset":"high","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:ade_corpus_v2","suite":["helm","helm_general"],"prompt_function":"raft_ade_corpus_v2","hf_repo":"ought\/raft","hf_subset":"ade_corpus_v2","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:banking_77","suite":["helm","helm_general"],"prompt_function":"raft_banking_77","hf_repo":"ought\/raft","hf_subset":"banking_77","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:neurips_impact_statement_risks","suite":["helm","helm_general"],"prompt_function":"raft_neurips_impact_statement_risks","hf_repo":"ought\/raft","hf_subset":"neurips_impact_statement_risks","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:one_stop_english","suite":["helm","helm_general"],"prompt_function":"raft_one_stop_english","hf_repo":"ought\/raft","hf_subset":"one_stop_english","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:overruling","suite":["helm","helm_general"],"prompt_function":"raft_overruling","hf_repo":"ought\/raft","hf_subset":"overruling","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:semiconductor_org_types","suite":["helm","helm_general"],"prompt_function":"raft_semiconductor_org_types","hf_repo":"ought\/raft","hf_subset":"semiconductor_org_types","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:systematic_review_inclusion","suite":["helm","helm_general"],"prompt_function":"raft_systematic_review_inclusion","hf_repo":"ought\/raft","hf_subset":"systematic_review_inclusion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:tai_safety_research","suite":["helm","helm_general"],"prompt_function":"raft_tai_safety_research","hf_repo":"ought\/raft","hf_subset":"tai_safety_research","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:terms_of_service","suite":["helm","helm_general"],"prompt_function":"raft_terms_of_service","hf_repo":"ought\/raft","hf_subset":"terms_of_service","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:tweet_eval_hate","suite":["helm","helm_general"],"prompt_function":"raft_tweet_eval_hate","hf_repo":"ought\/raft","hf_subset":"tweet_eval_hate","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"raft:twitter_complaints","suite":["helm","helm_general"],"prompt_function":"raft_twitter_complaints","hf_repo":"ought\/raft","hf_subset":"twitter_complaints","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"real_or_fake_text","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"real_or_fake_text","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"real_toxicity_prompts","suite":["helm"],"prompt_function":"real_toxicity_prompts","hf_repo":"allenai\/real-toxicity-prompts","hf_subset":"default","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"reasoning_about_colored_objects","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"reasoning_about_colored_objects","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"repeat_copy_logic","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"repeat_copy_logic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"rephrase","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"rephrase","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"rhyming","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"rhyming","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"riddle_sense","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"riddle_sense","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"ruin_names","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"ruin_names","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"salient_translation_error_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"salient_translation_error_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"scientific_press_release","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"scientific_press_release","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"sciq","suite":["lighteval"],"prompt_function":"sciq","hf_repo":"sciq","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"semantic_parsing_in_context_sparc","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"semantic_parsing_in_context_sparc","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"semantic_parsing_spider","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"semantic_parsing_spider","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"sentence_ambiguity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sentence_ambiguity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"similarities_abstraction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"similarities_abstraction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"simp_turing_concept","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simp_turing_concept","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"simple_arithmetic_json","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"simple_arithmetic_json_multiple_choice","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json_multiple_choice","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"simple_arithmetic_json_subtasks","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json_subtasks","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"simple_arithmetic_multiple_targets_json","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_multiple_targets_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"simple_ethical_questions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_ethical_questions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"simple_text_editing","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_text_editing","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"siqa","suite":["helm","commonsense_scenario"],"prompt_function":"siqa","hf_repo":"social_i_qa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"snarks","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"snarks","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"social_iqa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"social_iqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"social_support","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"social_support","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["f1_score_macro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"sports_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sports_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"storycloze:2016","suite":["lighteval","storycloze"],"prompt_function":"storycloze","hf_repo":"story_cloze","hf_subset":"2016","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"storycloze:2018","suite":["lighteval","storycloze"],"prompt_function":"storycloze","hf_repo":"story_cloze","hf_subset":"2018","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"strange_stories","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"strange_stories","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"strategyqa","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"strategyqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"sufficient_information","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sufficient_information","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"suicide_risk","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"suicide_risk","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"summarization:cnn-dm","suite":["helm","helm_general"],"prompt_function":"cnn_dm","hf_repo":"lighteval\/summarization","hf_subset":"cnn-dm","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"summarization:xsum","suite":["helm","helm_general"],"prompt_function":"xsum","hf_repo":"lighteval\/summarization","hf_subset":"xsum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":64,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"summarization:xsum-sampled","suite":["helm"],"prompt_function":"xsum","hf_repo":"lighteval\/summarization","hf_subset":"xsum-sampled","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":64,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"super_glue:boolq","suite":["lighteval","superglue"],"prompt_function":"boolq_harness","hf_repo":"super_glue","hf_subset":"boolq","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"super_glue:cb","suite":["lighteval","superglue"],"prompt_function":"cb","hf_repo":"super_glue","hf_subset":"cb","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token", "multi_f1_numeric"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"super_glue:copa","suite":["lighteval","superglue"],"prompt_function":"copa","hf_repo":"super_glue","hf_subset":"copa","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"super_glue:rte","suite":["lighteval","superglue"],"prompt_function":"rte","hf_repo":"super_glue","hf_subset":"rte","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"super_glue:multirc","suite":["lighteval","superglue"],"prompt_function":"multirc","hf_repo":"super_glue","hf_subset":"multirc","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"super_glue:wic","suite":["lighteval","superglue"],"prompt_function":"wic","hf_repo":"super_glue","hf_subset":"wic","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"super_glue:wsc","suite":["lighteval","superglue"],"prompt_function":"wsc","hf_repo":"super_glue","hf_subset":"wsc","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"swahili_english_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"swahili_english_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"swag","suite":["lighteval"],"prompt_function":"swag","hf_repo":"swag","hf_subset":"regular","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"swedish_to_german_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"swedish_to_german_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"symbol_interpretation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"symbol_interpretation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"synthetic_reasoning:induction","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"induction","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"synthetic_reasoning:natural_easy","suite":["helm"],"prompt_function":"synthetic_reasoning_natural","hf_repo":"lighteval\/synthetic_reasoning_natural","hf_subset":"easy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"synthetic_reasoning:natural_hard","suite":["helm"],"prompt_function":"synthetic_reasoning_natural","hf_repo":"lighteval\/synthetic_reasoning_natural","hf_subset":"hard","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"synthetic_reasoning:pattern_match","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"pattern_match","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"synthetic_reasoning:variable_substitution","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"variable_substitution","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"tellmewhy","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tellmewhy","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"temporal_sequences","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"temporal_sequences","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"tense","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tense","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:arxiv","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_arxiv","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:arxiv","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"arxiv","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:bibliotik","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"bibliotik","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:bookcorpus2","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_bookcorpus2","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:books3","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_books3","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:commoncrawl","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"commoncrawl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:dm-mathematics","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_dm-mathematics","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:dm-mathematics","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"dm-mathematics","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:enron","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_enron","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:enron","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"enron","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:europarl","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_europarl","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:europarl","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"europarl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:freelaw","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_freelaw","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:freelaw","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"freelaw","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:github","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_github","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:github","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"github","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:gutenberg","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_gutenberg","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:gutenberg","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"gutenberg","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:hackernews","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_hackernews","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:hackernews","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"hackernews","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:nih-exporter","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_nih-exporter","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:nih-exporter","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"nih-exporter","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:opensubtitles","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_opensubtitles","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:opensubtitles","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"opensubtitles","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:openwebtext2","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_openwebtext2","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:openwebtext2","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"openwebtext2","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:philpapers","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_philpapers","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:pile-cc","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pile-cc","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:pubmed-abstracts","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pubmed-abstracts","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:pubmed-abstracts","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"pubmed-abstracts","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:pubmed-central","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pubmed-central","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:pubmed-central","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"pubmed-central","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:stackexchange","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_stackexchange","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:stackexchange","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"stackexchange","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:ubuntu-irc","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_ubuntu-irc","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:uspto","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_upsto","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:upsto","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"uspto","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:wikipedia","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_wikipedia","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:wikipedia","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"wikipedia","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:youtubesubtitles","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_youtubesubtitles","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"the_pile:youtubesubtitles","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"youtubesubtitles","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"timedial","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"timedial","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"toxigen","suite":["lighteval"],"prompt_function":"toxigen","hf_repo":"skg/toxigen-data","hf_subset":"annotated","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"topical_chat","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"topical_chat","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc","bleurt"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"tracking_shuffled_objects","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tracking_shuffled_objects","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"triviaqa","suite":["lighteval"],"prompt_function":"triviaqa","hf_repo":"trivia_qa","hf_subset":"rc.nocontext","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["quasi_exact_match_triviaqa"],"stop_sequence":["\n", ".", ","],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"truthfulqa:gen","suite":["lighteval"],"prompt_function":"truthful_qa_generative","hf_repo":"truthful_qa","hf_subset":"generation","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"truthfulqa:mc","suite":["lighteval"],"prompt_function":"truthful_qa_multiple_choice","hf_repo":"truthful_qa","hf_subset":"multiple_choice","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["truthfulqa_mc_metrics"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"truthfulqa","suite":["helm","helm_general"],"prompt_function":"truthful_qa_helm","hf_repo":"lighteval\/truthfulqa_helm","hf_subset":"default","hf_avail_splits":["train","valid"],"evaluation_splits":["valid"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"twitterAAE:aa","suite":["helm"],"prompt_function":"twitter_aae","hf_repo":"lighteval\/twitterAAE","hf_subset":"aa","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"twitterAAE:white","suite":["helm"],"prompt_function":"twitter_aae","hf_repo":"lighteval\/twitterAAE","hf_subset":"white","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"understanding_fables","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"understanding_fables","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"undo_permutation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"undo_permutation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unit_conversion","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unit_conversion","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unit_interpretation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unit_interpretation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unnatural_in_context_learning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unnatural_in_context_learning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unscramble:anagrams1","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["mid_word_1_anagrams"],"evaluation_splits":["mid_word_1_anagrams"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unscramble:anagrams2","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["mid_word_2_anagrams"],"evaluation_splits":["mid_word_2_anagrams"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unscramble:cycle_letters","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["cycle_letters_in_word"],"evaluation_splits":["cycle_letters_in_word"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unscramble:random_insertion","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["random_insertion_in_word"],"evaluation_splits":["random_insertion_in_word"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"unscramble:reversed_words","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["reversed_words"],"evaluation_splits":["reversed_words"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"vitaminc_fact_verification","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"vitaminc_fact_verification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"webqs","suite":["lighteval"],"prompt_function":"webqs","hf_repo":"web_questions","hf_subset":"default","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"what_is_the_tao","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"what_is_the_tao","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"which_wiki_edit","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"which_wiki_edit","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:applies_to_jurisdiction","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"applies_to_jurisdiction","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:atomic_number","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"atomic_number","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:author","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"author","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:award_received","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"award_received","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:basic_form_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"basic_form_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:capital","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"capital","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:capital_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"capital_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:central_bank","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"central_bank","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:composer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"composer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:continent","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"continent","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:country","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:country_of_citizenship","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country_of_citizenship","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:country_of_origin","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country_of_origin","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:creator","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"creator","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:currency","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"currency","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:defendant","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"defendant","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:developer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"developer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:diplomatic_relation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"diplomatic_relation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:director","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"director","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:discoverer_or_inventor","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"discoverer_or_inventor","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:drug_or_therapy_used_for_treatment","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"drug_or_therapy_used_for_treatment","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:educated_at","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"educated_at","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:electron_configuration","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"electron_configuration","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:employer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"employer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:field_of_work","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"field_of_work","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:file_extension","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"file_extension","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:genetic_association","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"genetic_association","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:genre","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"genre","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:has_part","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"has_part","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:head_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"head_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:head_of_state","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"head_of_state","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:headquarters_location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"headquarters_location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:industry","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"industry","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:influenced_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"influenced_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:instance_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"instance_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:instrument","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"instrument","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:language_of_work_or_name","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"language_of_work_or_name","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:languages_spoken_written_or_signed","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"languages_spoken_written_or_signed","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:laws_applied","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"laws_applied","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:located_in_the_administrative_territorial_entity","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"located_in_the_administrative_territorial_entity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:location_of_discovery","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location_of_discovery","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:location_of_formation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location_of_formation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:majority_opinion_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"majority_opinion_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:manufacturer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"manufacturer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:measured_physical_quantity","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"measured_physical_quantity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:medical_condition_treated","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"medical_condition_treated","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:member_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:member_of_political_party","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of_political_party","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:member_of_sports_team","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of_sports_team","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:movement","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"movement","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:named_after","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"named_after","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:native_language","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"native_language","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:number_of_processor_cores","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"number_of_processor_cores","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:occupation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"occupation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:office_held_by_head_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"office_held_by_head_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:office_held_by_head_of_state","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"office_held_by_head_of_state","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:official_language","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"official_language","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:operating_system","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"operating_system","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:original_language_of_film_or_TV_show","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"original_language_of_film_or_TV_show","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:original_network","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"original_network","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:overrules","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"overrules","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:owned_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"owned_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:part_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"part_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:participating_team","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"participating_team","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:place_of_birth","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"place_of_birth","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:place_of_death","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"place_of_death","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:plaintiff","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"plaintiff","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:position_held","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"position_held","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:position_played_on_team","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"position_played_on_team","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:programming_language","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"programming_language","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:recommended_unit_of_measurement","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"recommended_unit_of_measurement","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:record_label","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"record_label","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:religion","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"religion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:repealed_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"repealed_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:shares_border_with","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"shares_border_with","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:solved_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"solved_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:statement_describes","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"statement_describes","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:stock_exchange","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"stock_exchange","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:subclass_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"subclass_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:subsidiary","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"subsidiary","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:symptoms_and_signs","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"symptoms_and_signs","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:therapeutic_area","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"therapeutic_area","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:time_of_discovery_or_invention","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"time_of_discovery_or_invention","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:twinned_administrative_body","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"twinned_administrative_body","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikifact:work_location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"work_location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikitext:2","suite":["lighteval"],"prompt_function":"wikitext","hf_repo":"wikitext","hf_subset":"wikitext-2-raw-v1","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikitext:103:document_level","suite":["harness"],"prompt_function":"wikitext_harness","hf_repo":"EleutherAI\/wikitext_document_level","hf_subset":"wikitext-103-raw-v1","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wikitext:103:document_level","suite":["helm"],"prompt_function":"wikitext_helm","hf_repo":"EleutherAI\/wikitext_document_level","hf_subset":"wikitext-103-raw-v1","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wino_x_german","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"wino_x_german","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"winogrande","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"winogrande","hf_subset":"winogrande_xl","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"winowhy","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"winowhy","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:en-hu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_en-hu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt08:hu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt08_hu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:en-hu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-hu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:en-it","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_en-it","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:hu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_hu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt09:it-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt09_it-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt10:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt10_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt11:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt11_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt12:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt12_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:en-es","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:es-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_es-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt13:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt13_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:en-fr","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_alphabetical","hf_repo":"wmt14","hf_subset":"fr-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:en-hi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-hi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:fr-en","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"wmt14","hf_subset":"fr-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:hi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_hi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt14_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:cs-en","suite":["helm"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"cs-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:de-en","suite":["helm"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"de-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:fr-en","suite":["helm"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"fr-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:hi-en","suite":["helm"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"hi-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt14:ru-en","suite":["helm"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/wmt14","hf_subset":"ru-en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt15:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt15_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:de-en","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_alphabetical","hf_repo":"wmt16","hf_subset":"de-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-de","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"wmt16","hf_subset":"de-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-ro","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_alphabetical","hf_repo":"wmt16","hf_subset":"ro-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-ro","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-ro","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:en-tr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_en-tr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:ro-en","suite":["lighteval","gpt3_benchmarks"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"wmt16","hf_subset":"ro-en","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:ro-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_ro-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt16:tr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt16_tr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:en-lv","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-lv","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:en-tr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-tr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:lv-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_lv-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:tr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_tr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt17:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt17_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:en-et","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-et","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:en-tr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-tr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:et-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_et-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:tr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_tr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt18:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt18_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:cs-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_cs-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:de-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_de-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:de-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_de-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-fi","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-fi","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-gu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-gu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-kk","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-kk","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-lt","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-lt","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:fi-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_fi-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:fr-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_fr-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:gu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_gu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:kk-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_kk-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:lt-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_lt-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt19:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt19_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:cs-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_cs-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:de-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_de-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-cs","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-cs","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-iu","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-iu","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-km","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-km","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-pl","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-pl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-ps","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ps","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-ru","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-ta","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-ta","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:fr-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_fr-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:iu-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_iu-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:km-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_km-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:pl-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_pl-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:ps-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ps-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:ru-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ru-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:ta-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_ta-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wmt20:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"wmt20_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"word_sorting","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"word_sorting","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"word_unscrambling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"word_unscrambling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"wsc273","suite":["lighteval"],"prompt_function":"wsc273","hf_repo":"winograd_wsc","hf_subset":"wsc273","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:en","suite":["lighteval"],"prompt_function":"xcopa_en","hf_repo":"xcopa","hf_subset":"default","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:et","suite":["lighteval"],"prompt_function":"xcopa_et","hf_repo":"xcopa","hf_subset":"et","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:ht","suite":["lighteval"],"prompt_function":"xcopa_ht","hf_repo":"xcopa","hf_subset":"ht","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:it","suite":["lighteval"],"prompt_function":"xcopa_it","hf_repo":"xcopa","hf_subset":"it","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:id","suite":["lighteval"],"prompt_function":"xcopa_id","hf_repo":"xcopa","hf_subset":"id","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:qu","suite":["lighteval"],"prompt_function":"xcopa_qu","hf_repo":"xcopa","hf_subset":"qu","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:sw","suite":["lighteval"],"prompt_function":"xcopa_sw","hf_repo":"xcopa","hf_subset":"sw","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:zh","suite":["lighteval"],"prompt_function":"xcopa_zh","hf_repo":"xcopa","hf_subset":"zh","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:ta","suite":["lighteval"],"prompt_function":"xcopa_ta","hf_repo":"xcopa","hf_subset":"ta","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:th","suite":["lighteval"],"prompt_function":"xcopa_th","hf_repo":"xcopa","hf_subset":"th","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:tr","suite":["lighteval"],"prompt_function":"xcopa_tr","hf_repo":"xcopa","hf_subset":"tr","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xcopa:vi","suite":["lighteval"],"prompt_function":"xcopa_vi","hf_repo":"xcopa","hf_subset":"vi","hf_avail_splits":["test","train","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:en","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"en","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:ru","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"ru","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:zh","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"zh","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:es","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"es","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:ar","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"ar","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:hi","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"hi","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:id","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"id","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:te","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"te","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:sw","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"sw","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:eu","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"eu","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xstory_cloze:my","suite":["lighteval"],"prompt_function":"storycloze","hf_repo":"juletxara/xstory_cloze","hf_subset":"my","hf_avail_splits":["training", "eval"],"evaluation_splits":["eval"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xwinograd:en","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xwinograd:fr","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xwinograd:jp","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"jp","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xwinograd:pt","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"pt","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xwinograd:ru","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"ru","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"xwinograd:zh","suite":["lighteval"],"prompt_function":"winogrande","hf_repo":"Muennighoff/xwinograd","hf_subset":"zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} diff --git a/tasks_examples/custom_tasks/custom_evaluation_tasks.py b/tasks_examples/custom_tasks/custom_evaluation_tasks.py index a7a7a5af0..d6fed44d1 100644 --- a/tasks_examples/custom_tasks/custom_evaluation_tasks.py +++ b/tasks_examples/custom_tasks/custom_evaluation_tasks.py @@ -25,6 +25,7 @@ hf_repo="hellaswag", hf_subset="default", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -33,6 +34,7 @@ hf_repo="winogrande", hf_subset="winogrande_xl", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -41,6 +43,7 @@ hf_repo="piqa", hf_subset="plain_text", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -50,6 +53,7 @@ hf_subset="default", hf_avail_splits=["train", "validation"], metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -58,6 +62,7 @@ hf_repo="openbookqa", hf_subset="main", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -68,6 +73,7 @@ evaluation_splits=["test"], generation_size=1, metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -78,6 +84,7 @@ evaluation_splits=["test"], generation_size=1, metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -86,6 +93,7 @@ hf_repo="commonsense_qa", hf_subset="default", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + trust_dataset=True, stop_sequence=["\n"], ), ] @@ -146,6 +154,7 @@ def preprocess(text): hf_subset="rc.nocontext", metric=[Metrics.quasi_exact_match], generation_size=20, + trust_dataset=True, stop_sequence=["\n", ".", ","], ), LightevalTaskConfig( @@ -155,6 +164,7 @@ def preprocess(text): hf_subset="default", metric=[Metrics.quasi_exact_match], generation_size=20, + trust_dataset=True, stop_sequence=["\n", ".", ","], ), ] @@ -184,6 +194,7 @@ def natural_questions_prompt(line, task_name: str = None): hf_repo="super_glue", hf_subset="boolq", metric=["target_perplexity"], + trust_dataset=True, stop_sequence=["\n"], ), LightevalTaskConfig( @@ -193,6 +204,7 @@ def natural_questions_prompt(line, task_name: str = None): hf_subset="deault", metric=[Metrics.quasi_exact_match], generation_size=20, + trust_dataset=True, stop_sequence=["\n", ".", ","], ), ] @@ -229,6 +241,7 @@ def __init__( few_shots_select=None, suite=["custom"], generation_size=40, + trust_dataset=True, stop_sequence=None, output_regex=None, frozen=False, @@ -247,6 +260,7 @@ def __init__( generation_size=generation_size, output_regex=output_regex, frozen=frozen, + trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), ) @@ -296,6 +310,7 @@ def __init__( few_shots_select=None, suite=None, generation_size=-1, + trust_dataset=True, stop_sequence=None, output_regex=None, frozen=False, @@ -312,6 +327,7 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, + trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), output_regex=output_regex, frozen=frozen, @@ -438,6 +454,7 @@ def __init__( few_shots_select=None, suite=None, generation_size=4, + trust_dataset=True, stop_sequence=None, output_regex=None, frozen=False, @@ -454,6 +471,7 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, + trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), output_regex=output_regex, frozen=frozen, @@ -530,6 +548,7 @@ def __init__( few_shots_select=None, suite=None, generation_size=-1, + trust_dataset=True, stop_sequence=None, output_regex=None, frozen=False, @@ -546,6 +565,7 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, + trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), output_regex=output_regex, frozen=frozen,