Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update huggingface-hub for compatibility with datasets 2.18 #84

Merged
merged 4 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ Summary: create a **line summary** of your evaluation, in `src/lighteval/tasks/t
- `metric` (list), the metrics you want to use for your evaluation (see next section for a detailed explanation)
- `output_regex` (str), A regex string that will be used to filter your generation. (Genrative metrics will only select tokens that are between the first and the second sequence matched by the regex. For example, for a regex matching `\n` and a generation `\nModel generation output\nSome other text` the metric will only be fed with `Model generation output`)
- `frozen` (bool), for now is set to False, but we will steadily pass all stable tasks to True.
- `trust_dataset` (bool), set to True if you trust the dataset.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(nit) why not call it "trust_remote_code" as the parameter name in datasets ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To not confuse it with trust_remote_code required in models, initially ^^

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with adding it, feel free to open an issue :)


Make sure you can launch your model with your new task using `--tasks lighteval|yournewtask|2|0`.

Expand Down
3 changes: 3 additions & 0 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
stop_sequence=None,
output_regex=None,
frozen=False,
trust_dataset=True,
)


Expand Down Expand Up @@ -115,6 +116,7 @@ def __init__(
stop_sequence=None,
output_regex=None,
frozen=False,
trust_dataset=True,
)


Expand Down Expand Up @@ -145,6 +147,7 @@ def acva(line, task_name: str = None):
few_shots_split="validation",
few_shots_select="sequential",
metric=["loglikelihood_acc"],
trust_dataset=True,
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ keywords = ["evaluation", "nlp", "llm"]
dependencies = [
# Base dependencies
"transformers>=4.38.0",
"huggingface_hub==0.20.3",
"huggingface_hub>=0.21.2",
"torch>=2.0",
"GitPython==3.1.31", # for logging
"datasets>=2.14.0",
Expand Down
24 changes: 12 additions & 12 deletions src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional, Tuple, Union

from datasets import load_dataset
from datasets import DownloadMode, load_dataset

from lighteval.few_shot_manager import FewShotSampler
from lighteval.logging.hierarchical_logger import hlog, hlog_warn
Expand Down Expand Up @@ -62,7 +62,7 @@ class LightevalTaskConfig:
truncated_num_docs (bool): Whether less than the total number of documents were used
output_regex (str)
frozen (bool)

trust_dataset (bool): Whether to trust the dataset at execution or not
"""

name: str
Expand All @@ -84,6 +84,8 @@ class LightevalTaskConfig:
original_num_docs: int = -1
effective_num_docs: int = -1

trust_dataset: bool = None

def as_dict(self):
return {
"name": self.name,
Expand Down Expand Up @@ -144,6 +146,7 @@ def __init__(self, name: str, cfg: LightevalTaskConfig, cache_dir: Optional[str]
self.dataset_path = self.hf_repo
self.dataset_config_name = self.hf_subset
self.dataset = None # Delayed download
self.trust_dataset = cfg.trust_dataset
hlog(f"{self.dataset_path} {self.dataset_config_name}")
self._fewshot_docs = None
self._docs = None
Expand Down Expand Up @@ -521,14 +524,10 @@ def load_datasets(tasks: list["LightevalTask"], dataset_loading_processes: int =
"""

if dataset_loading_processes <= 1:
datasets = [
download_dataset_worker((task.dataset_path, task.dataset_config_name)) for task in tasks
] # Also help us with gdb
datasets = [download_dataset_worker(task) for task in tasks] # Also help us with gdb
else:
with Pool(processes=dataset_loading_processes) as pool:
datasets = pool.map(
download_dataset_worker, [(task.dataset_path, task.dataset_config_name) for task in tasks]
)
datasets = pool.map(download_dataset_worker, tasks)

for task, dataset in zip(tasks, datasets):
task.dataset = dataset
Expand All @@ -539,13 +538,14 @@ def download_dataset_worker(args):
Worker function to download a dataset from the HuggingFace Hub.
Used for parallel dataset loading.
"""
dataset_path, dataset_config_name = args
task: LightevalTask = args
dataset = load_dataset(
path=dataset_path,
name=dataset_config_name,
path=task.dataset_path,
name=task.dataset_config_name,
data_dir=None,
cache_dir=None,
download_mode=None,
download_mode=DownloadMode.FORCE_REDOWNLOAD, # None
trust_remote_code=task.trust_dataset,
)
return dataset

Expand Down
Loading
Loading