Skip to content

Commit

Permalink
set a max length for the MATH task, to cap it at 2048 even for models…
Browse files Browse the repository at this point in the history
… with a much longer context size. Should fix #80
  • Loading branch information
clefourrier committed Mar 4, 2024
1 parent b9d0277 commit ff193b9
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/lighteval/tasks/tasks_table.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,13 @@
{"name":"lsat_qa:grouping","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"grouping","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"lsat_qa:miscellaneous","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"miscellaneous","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"lsat_qa:ordering","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"ordering","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:geometry","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:number_theory","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:prealgebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:precalculus","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:geometry","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:number_theory","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:prealgebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"math:precalculus","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"mathematical_induction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mathematical_induction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"mathqa","suite":["lighteval"],"prompt_function":"mathqa","hf_repo":"math_qa","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
{"name":"matrixshapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"matrixshapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false}
Expand Down

0 comments on commit ff193b9

Please sign in to comment.