Skip to content

Commit

Permalink
Merge pull request #712 from allenai/ot-fix-oe-eval-bpb
Browse files Browse the repository at this point in the history
Fix bug in bpb tasks from oe-eval, add 0-shot csqa and social_iqa
  • Loading branch information
OyvindTafjord authored Aug 27, 2024
2 parents 5560b6d + 136fb46 commit 46f06cb
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added downstream eval task for requests dumped from oe-eval tasks
- Added `CosLinearEnvelope` scheduler, which is a pointwise product of a cosine schedule and a linear decay.
- Added ability to save outputs of submodules for debugging purposes.
- Added a number of tasks from oe-eval to the downstream eval tasks.
- Version dolma flan change in named_data_mix.py

### Changed
Expand Down
21 changes: 18 additions & 3 deletions olmo/eval/downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -1534,9 +1534,14 @@ def prep_examples(self):
continuation_str = request_dict["continuation"]
label_id = request["label"]
cont_id = request["idx"]
if self.metric_type in ["ce_loss", "bpb"] and label_id != cont_id:
# Skip non-target continuations for ce_loss and bpb
continue
if self.metric_type in ["ce_loss", "bpb"]:
if label_id != cont_id:
# Skip non-target continuations for ce_loss and bpb
continue
else:
# Treat as instance with just one continuation
cont_id = 0
label_id = 0
doc_text = request_dict["context"]
ctx = self.token_encode(doc_text)
dc = self.token_encode(self.doc_to_domain_conditional(doc))
Expand Down Expand Up @@ -1746,6 +1751,8 @@ def doc_to_label(self, doc) -> int:
),
"csqa_mc_5shot": (OEEvalTask, {"dataset_path": "csqa", "dataset_name": "mc_5shot", "metric_type": "acc"}),
"csqa_mc_5shot_bpb": (OEEvalTask, {"dataset_path": "csqa", "dataset_name": "mc_5shot", "metric_type": "bpb"}),
"csqa_rc_0shot": (OEEvalTask, {"dataset_path": "csqa", "dataset_name": "rc_0shot", "metric_type": "len_norm"}),
"csqa_rc_0shot_bpb": (OEEvalTask, {"dataset_path": "csqa", "dataset_name": "rc_0shot", "metric_type": "bpb"}),
"csqa_rc_5shot": (OEEvalTask, {"dataset_path": "csqa", "dataset_name": "rc_5shot", "metric_type": "len_norm"}),
"csqa_rc_5shot_bpb": (OEEvalTask, {"dataset_path": "csqa", "dataset_name": "rc_5shot", "metric_type": "bpb"}),
"hellaswag_mc_5shot": (
Expand Down Expand Up @@ -1812,6 +1819,14 @@ def doc_to_label(self, doc) -> int:
OEEvalTask,
{"dataset_path": "socialiqa", "dataset_name": "mc_5shot", "metric_type": "bpb"},
),
"socialiqa_rc_0shot": (
OEEvalTask,
{"dataset_path": "socialiqa", "dataset_name": "rc_0shot", "metric_type": "len_norm"},
),
"socialiqa_rc_0shot_bpb": (
OEEvalTask,
{"dataset_path": "socialiqa", "dataset_name": "rc_0shot", "metric_type": "bpb"},
),
"socialiqa_rc_5shot": (
OEEvalTask,
{"dataset_path": "socialiqa", "dataset_name": "rc_5shot", "metric_type": "len_norm"},
Expand Down
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/csqa/rc_0shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "csqa", "task_hash": "739e57911dbac4fcdf9e5ff392d1b61e", "task_config": {"dataset_path": "commonsense_qa", "native_id_field": "id", "primary_metric": "acc_per_char", "split": "validation", "random_subsample_seed": 1234, "num_shots": 0, "generation_kwargs": {}, "context_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "fewshot_seed": 1234, "dataset_name": null, "task_name": "csqa", "version": 0, "task_core": "csqa"}, "current_date": "2024-08-23 21:20:34 UTC", "num_instances": 1221}
Binary file not shown.
1 change: 1 addition & 0 deletions olmo_data/oe_eval_tasks/socialiqa/rc_0shot/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"task_name": "socialiqa", "task_hash": "0880e16cf3101aff83d7fee648402df9", "task_config": {"dataset_path": "social_i_qa", "native_id_field": "index", "primary_metric": "acc_per_char", "split": "validation", "random_subsample_seed": 1234, "num_shots": 0, "generation_kwargs": {}, "context_kwargs": {}, "metric_kwargs": {"uncond_docid_offset": 1000000}, "fewshot_seed": 1234, "dataset_name": null, "task_name": "socialiqa", "version": 0, "task_core": "socialiqa"}, "current_date": "2024-08-23 21:20:37 UTC", "num_instances": 1954}
Binary file not shown.

0 comments on commit 46f06cb

Please sign in to comment.