Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Homogeneize logging system #150

Merged
merged 16 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Install the dependencies. For the default installation, you just need:
pip install .
```

If you want to evaluate models with frameworks like `accelerate` or `peft`, you will need to specify the optional dependencies group that fits your use case (`accelerate`,`tgi`,`optimum`,`quantization`,`adapters`,`nanotron`):
If you want to evaluate models with frameworks like `accelerate` or `peft`, you will need to specify the optional dependencies group that fits your use case (`accelerate`,`tgi`,`optimum`,`quantization`,`adapters`,`nanotron`,`tensorboardX`):

```bash
pip install '.[optional1,optional2]'
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ keywords = ["evaluation", "nlp", "llm"]
dependencies = [
# Base dependencies
"transformers>=4.38.0",
"huggingface_hub>=0.22.0",
"huggingface_hub>=0.23.0",
"torch>=2.0",
"GitPython>=3.1.41", # for logging
"datasets>=2.14.0",
Expand Down Expand Up @@ -86,6 +86,7 @@ nanotron = [
"nanotron",
"tensorboardX"
]
tensorboardX = ["tensorboardX"]
quality = ["ruff==v0.2.2","pre-commit"]
tests = ["pytest==7.4.0"]
dev = ["lighteval[accelerate,quality,tests]"]
Expand Down
1 change: 1 addition & 0 deletions run_evals_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def get_parser():
parser.add_argument("--push_results_to_hub", default=False, action="store_true")
parser.add_argument("--save_details", action="store_true")
parser.add_argument("--push_details_to_hub", default=False, action="store_true")
parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true")
parser.add_argument(
"--public_run", default=False, action="store_true", help="Push results and details to a public repo"
)
Expand Down
230 changes: 106 additions & 124 deletions src/lighteval/logging/evaluation_tracker.py

Large diffs are not rendered by default.

14 changes: 10 additions & 4 deletions src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,15 @@
@htrack()
def main(args):
env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
evaluation_tracker = EvaluationTracker(hub_results_org=args.results_org, token=TOKEN)
evaluation_tracker = EvaluationTracker(
output_dir=args.output_dir,
hub_results_org=args.results_org,
push_results_to_hub=args.push_results_to_hub,
push_details_to_hub=args.push_details_to_hub,
push_results_to_tensorboard=args.push_results_to_tensorboard,
public=args.public_run,
token=TOKEN,
)
evaluation_tracker.general_config_logger.log_args_info(
args.num_fewshot_seeds, args.override_batch_size, args.max_samples, args.job_id
)
Expand Down Expand Up @@ -124,9 +132,7 @@ def main(args):
evaluation_tracker.details_logger.aggregate()

if args.output_dir:
evaluation_tracker.save(
args.output_dir, args.push_results_to_hub, args.push_details_to_hub, args.public_run
)
evaluation_tracker.save()

final_dict = evaluation_tracker.generate_final_dict()

Expand Down
8 changes: 7 additions & 1 deletion src/lighteval/main_nanotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,13 @@ def main(
data_parallel_size=lighteval_config.parallelism.dp,
)

evaluation_tracker = EvaluationTracker(token=TOKEN)
evaluation_tracker = EvaluationTracker(
token=TOKEN,
output_dir=lighteval_config.logging.local_output_path,
hub_results_org=lighteval_config.logging.hub_repo_tensorboard,
tensorboard_metric_prefix=lighteval_config.logging.tensorboard_metric_prefix,
nanotron_run_info=nanotron_config.general,
)
evaluation_tracker.general_config_logger.log_args_info(
num_fewshot_seeds=1,
override_batch_size=None,
Expand Down
9 changes: 9 additions & 0 deletions src/lighteval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@ def is_peft_available() -> bool:
NO_PEFT_ERROR_MSG = "You are trying to use adapter weights models, for which you need `peft`, which is not available in your environment. Please install it using pip."


def is_tensorboardX_available() -> bool:
return importlib.util.find_spec("tensorboardX") is not None


NO_TENSORBOARDX_WARN_MSG = (
"You are trying to log using tensorboardX, which is not installed. Please install it using pip. Skipping."
)


def is_openai_available() -> bool:
return importlib.util.find_spec("openai") is not None

Expand Down
Loading