Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Homogeneize logging system #150

Merged
merged 16 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Install the dependencies. For the default installation, you just need:
pip install .
```

If you want to evaluate models with frameworks like `accelerate` or `peft`, you will need to specify the optional dependencies group that fits your use case (`accelerate`,`tgi`,`optimum`,`quantization`,`adapters`,`nanotron`):
If you want to evaluate models with frameworks like `accelerate` or `peft`, you will need to specify the optional dependencies group that fits your use case (`accelerate`,`tgi`,`optimum`,`quantization`,`adapters`,`nanotron`,`tensorboardX`):

```bash
pip install '.[optional1,optional2]'
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ keywords = ["evaluation", "nlp", "llm"]
dependencies = [
# Base dependencies
"transformers>=4.38.0",
"huggingface_hub>=0.22.0",
"huggingface_hub>=0.23.0",
"torch>=2.0",
"GitPython>=3.1.41", # for logging
"datasets>=2.14.0",
Expand Down Expand Up @@ -86,6 +86,7 @@ nanotron = [
"nanotron",
"tensorboardX"
]
tensorboardX = ["tensorboardX"]
quality = ["ruff==v0.2.2","pre-commit"]
tests = ["pytest==7.4.0"]
dev = ["lighteval[accelerate,quality,tests]"]
Expand Down
1 change: 1 addition & 0 deletions run_evals_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def get_parser():
parser.add_argument("--push_results_to_hub", default=False, action="store_true")
parser.add_argument("--save_details", action="store_true")
parser.add_argument("--push_details_to_hub", default=False, action="store_true")
parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true")
parser.add_argument(
"--public_run", default=False, action="store_true", help="Push results and details to a public repo"
)
Expand Down
230 changes: 106 additions & 124 deletions src/lighteval/logging/evaluation_tracker.py

Large diffs are not rendered by default.

16 changes: 1 addition & 15 deletions src/lighteval/logging/info_loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,7 @@
from lighteval.models.model_output import ModelReturn
from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
from lighteval.tasks.requests import Doc
from lighteval.utils import as_list, is_nanotron_available, sanitize_numpy


if is_nanotron_available():
from nanotron.config import Config
from lighteval.utils import as_list, sanitize_numpy


@dataclass(init=False)
Expand Down Expand Up @@ -86,9 +82,6 @@ class GeneralConfigLogger:
model_dtype: str = None
model_size: str = None

# Nanotron config
config: "Config" = None
clefourrier marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self) -> None:
"""Stores the current lighteval commit for reproducibility, and starts the evaluation timer."""
try:
Expand All @@ -105,7 +98,6 @@ def log_args_info(
override_batch_size: Union[None, int],
max_samples: Union[None, int],
job_id: str,
config: "Config" = None,
) -> None:
"""
Logs the information about the arguments passed to the method.
Expand All @@ -117,17 +109,11 @@ def log_args_info(
Else, the batch size is automatically inferred depending on what fits in memory.
max_samples (Union[None, int]): maximum number of samples, if None, use all the samples available.
job_id (str): job ID, used to retrieve logs.
config (optional): Nanotron Config

Returns:
None

"""
self.num_fewshot_seeds = num_fewshot_seeds
self.override_batch_size = override_batch_size
self.max_samples = max_samples
self.job_id = job_id
self.config = config

def log_model_info(self, model_info: ModelInfo) -> None:
"""
Expand Down
14 changes: 10 additions & 4 deletions src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,15 @@
@htrack()
def main(args):
env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
evaluation_tracker = EvaluationTracker(hub_results_org=args.results_org, token=TOKEN)
evaluation_tracker = EvaluationTracker(
output_dir=args.output_dir,
hub_results_org=args.results_org,
push_results_to_hub=args.push_results_to_hub,
push_details_to_hub=args.push_details_to_hub,
push_results_to_tensorboard=args.push_results_to_tensorboard,
public=args.public_run,
token=TOKEN,
)
evaluation_tracker.general_config_logger.log_args_info(
args.num_fewshot_seeds, args.override_batch_size, args.max_samples, args.job_id
)
Expand Down Expand Up @@ -124,9 +132,7 @@ def main(args):
evaluation_tracker.details_logger.aggregate()

if args.output_dir:
evaluation_tracker.save(
args.output_dir, args.push_results_to_hub, args.push_details_to_hub, args.public_run
)
evaluation_tracker.save()

final_dict = evaluation_tracker.generate_final_dict()

Expand Down
8 changes: 7 additions & 1 deletion src/lighteval/main_nanotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,13 @@ def main(
data_parallel_size=lighteval_config.parallelism.dp,
)

evaluation_tracker = EvaluationTracker(token=TOKEN)
evaluation_tracker = EvaluationTracker(
token=TOKEN,
output_dir=lighteval_config.logging.local_output_path,
hub_results_org=lighteval_config.logging.hub_repo_tensorboard,
tensorboard_metric_prefix=lighteval_config.logging.tensorboard_metric_prefix,
nanotron_run_info=nanotron_config.general,
)
evaluation_tracker.general_config_logger.log_args_info(
num_fewshot_seeds=1,
override_batch_size=None,
Expand Down
9 changes: 9 additions & 0 deletions src/lighteval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@ def is_peft_available() -> bool:
NO_PEFT_ERROR_MSG = "You are trying to use adapter weights models, for which you need `peft`, which is not available in your environment. Please install it using pip."



def is_tensorboardX_available() -> bool:
return importlib.util.find_spec("tensorboardX") is not None


NO_TENSORBOARDX_WARN_MSG = (
"You are trying to log using tensorboardX, which is not installed. Please install it using pip. Skipping."
)

def is_openai_available() -> bool:
return importlib.util.find_spec("openai") is not None

Expand Down
Loading