Skip to content

Commit

Permalink
Merge branch 'SN1-423-restructure-prompting' into 'SN1-419-r-d-resear…
Browse files Browse the repository at this point in the history
…ch-better-scoring-mechanisms-for-msr'
  • Loading branch information
richwardle committed Mar 4, 2025
2 parents 1fad896 + 0245a2a commit 46fe23a
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 105 deletions.
4 changes: 2 additions & 2 deletions prompting/rewards/inference_reward_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ async def reward(
) -> BatchRewardOutput:
"""Gives an exact reward of 1 if the response matches the reference, 0 otherwise"""
if model_id:
return ExactMatchRewardModel().reward(reference, response_event)
return RelevanceRewardModel().reward(reference, response_event)
return await ExactMatchRewardModel().reward(reference, response_event)
return await RelevanceRewardModel().reward(reference, response_event)
File renamed without changes.
125 changes: 125 additions & 0 deletions shared/prompts/test_time_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import textwrap


def intro_prompt() -> str:
"""
Returns the intro prompt.
"""

intro = textwrap.dedent(
"""\
You are a world-class expert in analytical reasoning and problem-solving. Your task is to break down complex problems through rigorous step-by-step analysis, carefully examining each aspect before moving forward. For each reasoning step:
OUTPUT FORMAT:
Return a JSON object with these required fields:
{
"title": "Brief, descriptive title of current reasoning phase",
"content": "Detailed explanation of your analysis",
"next_action": "continue" or "final_answer"
}
REASONING PROCESS:
1. Initial Analysis
- Break down the problem into core components
- Identify key constraints and requirements
- List relevant domain knowledge and principles
2. Multiple Perspectives
- Examine the problem from at least 3 different angles
- Consider both conventional and unconventional approaches
- Identify potential biases in initial assumptions
3. Exploration & Validation
- Test preliminary conclusions against edge cases
- Apply domain-specific best practices
- Quantify confidence levels when possible (e.g., 90% certain)
- Document key uncertainties or limitations
4. Critical Review
- Actively seek counterarguments to your reasoning
- Identify potential failure modes
- Consider alternative interpretations of the data/requirements
- Validate assumptions against provided context
5. Synthesis & Refinement
- Combine insights from multiple approaches
- Strengthen weak points in the reasoning chain
- Address identified edge cases and limitations
- Build towards a comprehensive solution
REQUIREMENTS:
- Each step must focus on ONE specific aspect of reasoning
- Explicitly state confidence levels and uncertainty
- When evaluating options, use concrete criteria
- Include specific examples or scenarios when relevant
- Acknowledge limitations in your knowledge or capabilities
- Maintain logical consistency across steps
- Build on previous steps while avoiding redundancy
CRITICAL THINKING CHECKLIST:
✓ Have I considered non-obvious interpretations?
✓ Are my assumptions clearly stated and justified?
✓ Have I identified potential failure modes?
✓ Is my confidence level appropriate given the evidence?
✓ Have I adequately addressed counterarguments?
Remember: Quality of reasoning is more important than speed. Take the necessary steps to build a solid analytical foundation before moving to conclusions.
Example:
User Query: How many piano tuners are in New York City?
{Expected Answer:
{
"title": "Estimating the Number of Piano Tuners in New York City",
"content": "To estimate the number of piano tuners in NYC, we need to break down the problem into core components. Key factors include the total population of NYC, the number of households with pianos, the average number of pianos per household, and the frequency of piano tuning. We should also consider the number of professional piano tuners and their workload.",
"next_action": "continue"
}}
"""
).strip()

return intro


def system_acceptance_prompt() -> str:
"""
Returns the system acceptance prompt.
"""

system_acceptance = textwrap.dedent(
"""\
I understand. I will now analyze the problem systematically, following the structured reasoning process while maintaining high standards of analytical rigor and self-criticism.
"""
).strip()

return system_acceptance


def final_answer_prompt() -> str:
"""
Returns the final answer prompt.
"""

final_answer = textwrap.dedent(
"""\
Review your previous reasoning steps and synthesize them into a final answer.
Your response should:
1. Clearly state your final conclusion.
2. Summarize the key reasoning and evidence from previous steps.
3. Address any remaining uncertainties or alternative perspectives.
4. Note any relevant caveats or limitations to your conclusion.
Ensure the response is concise, well-structured, and avoids unnecessary repetition.
Do not include explicit confidence levels or probabilities.
Format your response as valid JSON:
{{
"title": "Final Answer",
"content": "Your synthesized conclusion and explanation here.",
"next_action": "final_answer"
}}
"""
).strip()

return final_answer
29 changes: 29 additions & 0 deletions tests/prompting/shared/test_get_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from shared.prompts.test_time_inference import final_answer_prompt, intro_prompt, system_acceptance_prompt


def test_intro_prompt():
"""Test that intro_prompt returns the correct prompt."""
prompt = intro_prompt()
assert isinstance(prompt, str)
assert "You are a world-class expert in analytical reasoning" in prompt
assert "OUTPUT FORMAT:" in prompt
assert "REASONING PROCESS:" in prompt
assert "REQUIREMENTS:" in prompt
assert "CRITICAL THINKING CHECKLIST:" in prompt


def test_system_acceptance_prompt():
"""Test that system_acceptance_prompt returns the correct prompt."""
prompt = system_acceptance_prompt()
assert isinstance(prompt, str)
assert "I understand. I will now analyze the problem systematically" in prompt


def test_final_answer_prompt():
"""Test that final_answer_prompt returns the correct prompt."""
prompt = final_answer_prompt()
assert isinstance(prompt, str)
assert "Review your previous reasoning steps" in prompt
assert "Format your response as valid JSON" in prompt
assert '"title":' in prompt
assert '"content":' in prompt
30 changes: 12 additions & 18 deletions validator_api/gpt_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,13 @@ async def completions(request: CompletionsRequest, api_key: str = Depends(valida

# Choose between regular inference, test time inference, and mixture of miners.
if body.get("test_time_inference", False):
return await test_time_inference(request)
test_time_request = TestTimeInferenceRequest(
messages=request.messages,
model=request.model,
uids=uids if uids else None,
json_format=request.json_format,
)
return await test_time_inference(test_time_request)
elif body.get("mixture", False):
return await mixture_of_miners(body, uids=uids)
else:
Expand Down Expand Up @@ -230,20 +236,6 @@ async def web_retrieval(
return WebRetrievalResponse(results=unique_results)


@router.post(
"/test_time_inference",
summary="Test time inference endpoint",
description="Provides step-by-step reasoning and thinking process during inference.",
response_description="Streaming response with reasoning steps",
status_code=status.HTTP_200_OK,
responses={
status.HTTP_200_OK: {
"description": "Successful streaming response with reasoning steps",
"content": {"text/event-stream": {}},
},
status.HTTP_500_INTERNAL_SERVER_ERROR: {"description": "Internal server error during streaming"},
},
)
async def test_time_inference(request: TestTimeInferenceRequest):
"""
Test time inference endpoint that provides step-by-step reasoning.
Expand Down Expand Up @@ -289,14 +281,16 @@ async def stream_steps():
i = 0
async for steps, thinking_time in create_response_stream(request):
i += 1
if request.json_format:
choice = Choice(index=i, delta=ChoiceDelta(content=json.dumps(steps[-1])))
else:
choice = Choice(index=i, delta=ChoiceDelta(content=f"## {steps[-1][0]}\n\n{steps[-1][1]}" + "\n\n"))
yield "data: " + ChatCompletionChunk(
id=str(uuid.uuid4()),
created=int(time.time()),
model=request.model or "None",
object="chat.completion.chunk",
choices=[
Choice(index=i, delta=ChoiceDelta(content=f"## {steps[-1][0]}\n\n{steps[-1][1]}" + "\n\n"))
],
choices=[choice],
).model_dump_json() + "\n\n"
except Exception as e:
logger.exception(f"Error during streaming: {e}")
Expand Down
2 changes: 2 additions & 0 deletions validator_api/scoring_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ async def run_step(self):
# Raise an exception so that the retry logic in the except block handles it.
raise Exception(f"Non-200 response: {response.status_code} for uids {uids}")
logger.debug(f"Forwarding response completed with status {response.status_code}")
except httpx.ConnectError as e:
logger.warning(f"Couldn't connect to validator {url} for Scoring {uids}. Exception: {e}")
except Exception as e:
if scoring_payload.retries < self.max_scoring_retries:
scoring_payload.retries += 1
Expand Down
2 changes: 2 additions & 0 deletions validator_api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class CompletionsRequest(BaseModel):
"do_sample": True,
},
)
json_format: bool = Field(default=False, description="Enable JSON format for the response.", example=True)


class WebRetrievalRequest(BaseModel):
Expand Down Expand Up @@ -107,6 +108,7 @@ class TestTimeInferenceRequest(BaseModel):
example=[{"role": "user", "content": "Solve the equation: 3x + 5 = 14"}],
)
model: Optional[str] = Field(default=None, description="Model identifier to use for inference.", example="gpt-4")
json_format: bool = Field(default=False, description="Enable JSON format for the response.", example=True)

def to_dict(self):
return self.model_dump().update({"messages": [m.model_dump() for m in self.messages]})
89 changes: 4 additions & 85 deletions validator_api/test_time_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from prompting.llms.apis.llm_messages import LLMMessage, LLMMessages
from prompting.llms.apis.llm_wrapper import LLMWrapper
from shared.prompts.test_time_inference import final_answer_prompt, intro_prompt, system_acceptance_prompt
from shared.timer import Timer
from validator_api.chat_completion import chat_completion

Expand Down Expand Up @@ -168,81 +169,14 @@ async def generate_response(
messages = [
{
"role": "system",
"content": """You are a world-class expert in analytical reasoning and problem-solving. Your task is to break down complex problems through rigorous step-by-step analysis, carefully examining each aspect before moving forward. For each reasoning step:
OUTPUT FORMAT:
Return a JSON object with these required fields:
{
"title": "Brief, descriptive title of current reasoning phase",
"content": "Detailed explanation of your analysis",
"next_action": "continue" or "final_answer"
}
REASONING PROCESS:
1. Initial Analysis
- Break down the problem into core components
- Identify key constraints and requirements
- List relevant domain knowledge and principles
2. Multiple Perspectives
- Examine the problem from at least 3 different angles
- Consider both conventional and unconventional approaches
- Identify potential biases in initial assumptions
3. Exploration & Validation
- Test preliminary conclusions against edge cases
- Apply domain-specific best practices
- Quantify confidence levels when possible (e.g., 90% certain)
- Document key uncertainties or limitations
4. Critical Review
- Actively seek counterarguments to your reasoning
- Identify potential failure modes
- Consider alternative interpretations of the data/requirements
- Validate assumptions against provided context
5. Synthesis & Refinement
- Combine insights from multiple approaches
- Strengthen weak points in the reasoning chain
- Address identified edge cases and limitations
- Build towards a comprehensive solution
REQUIREMENTS:
- Each step must focus on ONE specific aspect of reasoning
- Explicitly state confidence levels and uncertainty
- When evaluating options, use concrete criteria
- Include specific examples or scenarios when relevant
- Acknowledge limitations in your knowledge or capabilities
- Maintain logical consistency across steps
- Build on previous steps while avoiding redundancy
CRITICAL THINKING CHECKLIST:
✓ Have I considered non-obvious interpretations?
✓ Are my assumptions clearly stated and justified?
✓ Have I identified potential failure modes?
✓ Is my confidence level appropriate given the evidence?
✓ Have I adequately addressed counterarguments?
Remember: Quality of reasoning is more important than speed. Take the necessary steps to build a solid analytical foundation before moving to conclusions.
Example:
User Query: How many piano tuners are in New York City?
Expected Answer:
{
"title": "Estimating the Number of Piano Tuners in New York City",
"content": "To estimate the number of piano tuners in NYC, we need to break down the problem into core components. Key factors include the total population of NYC, the number of households with pianos, the average number of pianos per household, and the frequency of piano tuning. We should also consider the number of professional piano tuners and their workload.",
"next_action": "continue"
}
""",
"content": intro_prompt(),
}
]
messages += original_messages
messages += [
{
"role": "assistant",
"content": "I understand. I will now analyze the problem systematically, following the structured reasoning process while maintaining high standards of analytical rigor and self-criticism.",
"content": system_acceptance_prompt(),
}
]

Expand All @@ -257,7 +191,6 @@ async def generate_response(
total_thinking_time += thinking_time

steps.append((f"Step {step_count}: {step_data['title']}", step_data["content"], thinking_time))

messages.append({"role": "assistant", "content": json.dumps(step_data)})

if step_data["next_action"] == "final_answer" or not step_data.get("next_action"):
Expand All @@ -266,24 +199,10 @@ async def generate_response(
step_count += 1
yield steps, None

final_answer_prompt = """Based on your thorough analysis, please provide your final answer. Your response should:
1. Clearly state your conclusion
2. Summarize the key supporting evidence
3. Acknowledge any remaining uncertainties
4. Include relevant caveats or limitations
5. Synthesis & Refinement
Ensure your response uses the correct json format as follows:
{
"title": "Final Answer",
"content": "Conclusion and detailed explanation of your answer",
}"""

messages.append(
{
"role": "user",
"content": final_answer_prompt,
"content": final_answer_prompt(),
}
)

Expand Down

0 comments on commit 46fe23a

Please sign in to comment.