diff --git a/src/lighteval/tasks/tasks_table.jsonl b/src/lighteval/tasks/tasks_table.jsonl index 00a718acd..21c383fc3 100644 --- a/src/lighteval/tasks/tasks_table.jsonl +++ b/src/lighteval/tasks/tasks_table.jsonl @@ -459,13 +459,13 @@ {"name":"lsat_qa:grouping","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"grouping","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} {"name":"lsat_qa:miscellaneous","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"miscellaneous","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} {"name":"lsat_qa:ordering","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"ordering","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} -{"name":"math:algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} -{"name":"math:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} -{"name":"math:geometry","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} -{"name":"math:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} -{"name":"math:number_theory","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} -{"name":"math:prealgebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} -{"name":"math:precalculus","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:geometry","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:number_theory","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:prealgebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} +{"name":"math:precalculus","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} {"name":"mathematical_induction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mathematical_induction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} {"name":"mathqa","suite":["lighteval"],"prompt_function":"mathqa","hf_repo":"math_qa","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true} {"name":"matrixshapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"matrixshapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true}