Skip to content

Commit

Permalink
Add environment and transformers version logging in results dump (Ele…
Browse files Browse the repository at this point in the history
…utherAI#1464)

* Save git_hash to results even if git is not available to call as subprocess

* Store more info about environment and transformers version in results to help researchers track inconsistencies

* moved added logging to logging_utils

* moved get_git_commit_hash to logging_utils.py

* moved add_env_info inside evaluator
  • Loading branch information
LSinev authored and nightingal3 committed May 2, 2024
1 parent 57326eb commit f4ce962
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 29 deletions.
3 changes: 2 additions & 1 deletion lm_eval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
import lm_eval.api.metrics
import lm_eval.api.registry
import lm_eval.models
from lm_eval.logging_utils import add_env_info, get_git_commit_hash
from lm_eval.tasks import TaskManager, get_task_dict
from lm_eval.utils import (
eval_logger,
get_git_commit_hash,
positional_deprecated,
run_task_tests,
simple_parse_args_string,
Expand Down Expand Up @@ -221,6 +221,7 @@ def simple_evaluate(
"gen_kwargs": gen_kwargs,
}
results["git_hash"] = get_git_commit_hash()
add_env_info(results) # additional environment info to results
return results
else:
return None
Expand Down
65 changes: 60 additions & 5 deletions lm_eval/logging_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import copy
import json
import logging
import os
import re
from typing import Any, Dict, List, Literal, Tuple, Union
import subprocess
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd
from packaging.version import Version
from torch.utils.collect_env import get_pretty_env_info
from transformers import __version__ as trans_version

from lm_eval import utils
from lm_eval.utils import simple_parse_args_string


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -85,9 +90,7 @@ def __init__(self, args: Any) -> None:
results (Dict[str, Any]): The results dictionary.
args (Any): Arguments for configuration.
"""
self.wandb_args: Dict[str, Any] = utils.simple_parse_args_string(
args.wandb_args
)
self.wandb_args: Dict[str, Any] = simple_parse_args_string(args.wandb_args)

# initialize a W&B run
if wandb.run is None:
Expand Down Expand Up @@ -384,3 +387,55 @@ def log_eval_samples(self, samples: Dict[str, List[Dict[str, Any]]]) -> None:
self._log_samples_as_artifact(eval_preds, task_name)

self.run.log({f"{group}_eval_results": grouped_df})


def get_commit_from_path(repo_path: Path) -> Optional[str]:
git_folder = Path(repo_path, ".git")
if git_folder.is_file():
git_folder = Path(
git_folder.parent,
git_folder.read_text(encoding="utf-8").split("\n")[0].split(" ")[-1],
)
if Path(git_folder, "HEAD").exists():
head_name = (
Path(git_folder, "HEAD")
.read_text(encoding="utf-8")
.split("\n")[0]
.split(" ")[-1]
)
head_ref = Path(git_folder, head_name)
git_hash = head_ref.read_text(encoding="utf-8").replace("\n", "")
else:
git_hash = None
return git_hash


def get_git_commit_hash():
"""
Gets the git commit hash of your current repo (if it exists).
Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
"""
try:
git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
git_hash = git_hash.decode()
except (subprocess.CalledProcessError, FileNotFoundError):
# FileNotFoundError occurs when git not installed on system
git_hash = get_commit_from_path(os.getcwd()) # git hash of repo if exists
return git_hash


def add_env_info(storage: Dict[str, Any]):
try:
pretty_env_info = get_pretty_env_info()
except Exception as err:
pretty_env_info = str(err)
transformers_version = trans_version
upper_dir_commit = get_commit_from_path(
Path(os.getcwd(), "..")
) # git hash of upper repo if exists
added_info = {
"pretty_env_info": pretty_env_info,
"transformers_version": transformers_version,
"upper_git_hash": upper_dir_commit, # in case this repo is submodule
}
storage.update(added_info)
27 changes: 4 additions & 23 deletions lm_eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,11 @@
import inspect
import logging
import os
import pathlib
import re
import subprocess
import sys
from itertools import islice
from typing import (
Any,
Callable,
List,
)
from pathlib import Path
from typing import Any, Callable, List

import yaml
from jinja2 import BaseLoader, Environment, StrictUndefined
Expand Down Expand Up @@ -291,7 +286,7 @@ def _wrapper(*args, **kwargs):


@positional_deprecated
def find_test_root(start_path: pathlib.Path) -> pathlib.Path:
def find_test_root(start_path: Path) -> Path:
"""
Search upward in the directory tree to a maximum of three layers
to find and return the package root (containing the 'tests' folder)
Expand All @@ -315,7 +310,7 @@ def run_task_tests(task_list: List[str]):
"""
import pytest

package_root = find_test_root(start_path=pathlib.Path(__file__))
package_root = find_test_root(start_path=Path(__file__))
task_string = " or ".join(task_list)
args = [
f"{package_root}/tests/test_version_stable.py",
Expand All @@ -331,20 +326,6 @@ def run_task_tests(task_list: List[str]):
)


def get_git_commit_hash():
"""
Gets the git commit hash of your current repo (if it exists).
Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
"""
try:
git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
git_hash = git_hash.decode()
except subprocess.CalledProcessError or FileNotFoundError:
# FileNotFoundError occurs when git not installed on system
git_hash = None
return git_hash


def ignore_constructor(loader, node):
return node

Expand Down

0 comments on commit f4ce962

Please sign in to comment.