From 8618a893fd6fe18f2d3411591db50bdf9d5508a6 Mon Sep 17 00:00:00 2001 From: meg-huggingface <90473723+meg-huggingface@users.noreply.github.com> Date: Fri, 12 Jul 2024 18:12:56 -0700 Subject: [PATCH] debugging --- lm_eval/evaluator.py | 9 ++++++++- lm_eval/models/huggingface.py | 2 +- lm_eval/tasks/realtoxicityprompts/metric.py | 8 ++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py index ea4746753c..483a2b31c2 100644 --- a/lm_eval/evaluator.py +++ b/lm_eval/evaluator.py @@ -68,7 +68,7 @@ def simple_evaluate( fewshot_as_multiturn: bool = False, gen_kwargs: Optional[str] = None, task_manager: Optional[TaskManager] = None, - verbosity: str = "INFO", + verbosity: str = "DEBUG", predict_only: bool = False, random_seed: int = 0, numpy_random_seed: int = 1234, @@ -464,8 +464,15 @@ def evaluate( for _ in range(padding_requests[reqtype]): cloned_reqs.extend([req] * req.repeats) + print("lm is") + print(lm) + print("reqtype is") + print(reqtype) + #print("clone_reqs is") + #print(cloned_reqs) # run requests through model resps = getattr(lm, reqtype)(cloned_reqs) + print(lm.world_size) # put responses from model into a list of length K for each request. for x, req in zip(resps, cloned_reqs): diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py index 331684f5a6..10ae7ce7c1 100644 --- a/lm_eval/models/huggingface.py +++ b/lm_eval/models/huggingface.py @@ -75,7 +75,7 @@ class HFLM(TemplateLM): """ AUTO_MODEL_CLASS = None - _DEFAULT_MAX_LENGTH = 2048 + _DEFAULT_MAX_LENGTH = None def __init__( self, diff --git a/lm_eval/tasks/realtoxicityprompts/metric.py b/lm_eval/tasks/realtoxicityprompts/metric.py index 552cd33ddc..009f5b5228 100644 --- a/lm_eval/tasks/realtoxicityprompts/metric.py +++ b/lm_eval/tasks/realtoxicityprompts/metric.py @@ -41,12 +41,17 @@ def toxicity_perspective_api( scores = [] toxicity_scores = [] + print("Beginning session") s = requests.Session() backoff_factor = sleeping_time / (2 ** (total_retries - 1)) + print("Defining retries") retries = Retry(total=total_retries, backoff_factor=backoff_factor) + print("Mounting") s.mount("http://", HTTPAdapter(max_retries=retries)) for pred in predictions: + print("Looking at pred") + print(pred) data = { "comment": {"text": pred}, "languages": ["en"], @@ -56,9 +61,12 @@ def toxicity_perspective_api( "content-type": "application/json", } try: + print("Posting") req_response = s.post(url, json=data, headers=headers) if req_response.ok: response = json.loads(req_response.text) + print("Response is:") + print(response) if ( "attributeScores" in response and "TOXICITY" in response["attributeScores"]