From 9c53f8dfbd2c4b7dfef8ced0a2534a1fba9bc7d6 Mon Sep 17 00:00:00 2001 From: Roy Hvaara Date: Tue, 3 Sep 2024 23:20:04 +0200 Subject: [PATCH] Target v0.10.for task BigCodeBench/16 --- tools/fix_020.py | 59 ---------------------------------------------- tools/fix_v0110.py | 8 ++++++- 2 files changed, 7 insertions(+), 60 deletions(-) delete mode 100644 tools/fix_020.py diff --git a/tools/fix_020.py b/tools/fix_020.py deleted file mode 100644 index 82b48d7..0000000 --- a/tools/fix_020.py +++ /dev/null @@ -1,59 +0,0 @@ -from datasets import load_dataset, Dataset, DatasetDict -from huggingface_hub import HfApi - -import json -import copy - -BIGCODEBENCH_HF = "bigcode/bigcodebench" -BIGCODEBENCH_HARD_HF = "bigcode/bigcodebench-hard" -BIGCODEBENCH_VERSION = "v0.1.0_hf" -BIGCODEBENCH_UPDATE = "bigcode/bcb_update" -BIGCODEBENCH_NEW_VERSION = "v0.1.1" # TODO(hvaara): [DO NOT MERGE] Figure out which version we're targeting - - -def map_ds(sample): - - if sample["task_id"] in ["BigCodeBench/16"]: - for k in sample.keys(): - sample[k] = sample[k].replace( - "No logs found to backup.", "No logs found to backup" - ) - - return sample - - -if __name__ == "__main__": - api = HfApi() - ds_dict = load_dataset(BIGCODEBENCH_HF) - hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF) - ds = ds_dict[BIGCODEBENCH_VERSION] - hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION] - function_id = [16] - - new_ds = ds.map(map_ds) - new_ds.to_json("BigCodeBench.jsonl") - ds_dict[BIGCODEBENCH_NEW_VERSION] = new_ds - ds_dict.push_to_hub(BIGCODEBENCH_HF) - - new_hard_ds = hard_ds.map(map_ds) - new_hard_ds.to_json("BigCodeBench-Hard.jsonl") - hard_ds_dict[BIGCODEBENCH_NEW_VERSION] = new_hard_ds - hard_ds_dict.push_to_hub(BIGCODEBENCH_HARD_HF) - - for i in function_id: - old_sample = ds.select([i]) - new_sample = new_ds.select([i]) - old_sample.to_json("old.jsonl") - new_sample.to_json("new.jsonl") - api.upload_file( - path_or_fileobj="old.jsonl", - path_in_repo=f"{i}/old.jsonl", - repo_id=BIGCODEBENCH_UPDATE, - # repo_type="dataset" - ) - api.upload_file( - path_or_fileobj="new.jsonl", - path_in_repo=f"{i}/new.jsonl", - repo_id=BIGCODEBENCH_UPDATE, - # repo_type="dataset" - ) diff --git a/tools/fix_v0110.py b/tools/fix_v0110.py index d1d1300..a6aadac 100644 --- a/tools/fix_v0110.py +++ b/tools/fix_v0110.py @@ -11,6 +11,12 @@ BIGCODEBENCH_NEW_VERSION = "v0.1.2" def map_ds(sample): + if sample["task_id"] in ["BigCodeBench/16"]: + for k in sample.keys(): + sample[k] = sample[k].replace( + "No logs found to backup.", "No logs found to backup" + ) + if sample["task_id"] in ["BigCodeBench/37"]: for k in sample.keys(): if "prompt" in k: @@ -28,7 +34,7 @@ def map_ds(sample): hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF) ds = ds_dict[BIGCODEBENCH_VERSION] hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION] - function_id = [37] + function_id = [16, 37] new_ds = ds.map(map_ds) new_ds.to_json("BigCodeBench.jsonl")