add explanation

This commit is contained in:
2026-01-14 13:31:07 +08:00
parent a0ad19449a
commit f9ac34498c

View File

@@ -40,19 +40,21 @@ class Validator:
api_key=self.config.api_key api_key=self.config.api_key
) )
def default_correct(self, inputs: dict, outputs: dict, reference_outputs: dict) -> bool: def default_correct(self, inputs: dict, outputs: dict, reference_outputs: dict) -> dict:
instructions = ( instructions = (
"Given an actual answer and an expected answer, determine whether" "Given an actual answer and an expected answer, determine whether"
" the actual answer contains all of the information in the" " the actual answer contains all of the information in the"
" expected answer. Respond with 'CORRECT' if the actual answer" " expected answer. First provide your reasoning, then respond with"
" does contain all of the expected information and 'INCORRECT'" " your final judgment.\n\n"
" otherwise. Do not include anything else in your response." "Format your response EXACTLY as follows:\n"
"EXPLANATION: <your reasoning here>\n"
"JUDGMENT: <CORRECT or INCORRECT>"
) )
actual_answer = outputs["output"][-1].content actual_answer = outputs["output"][-1].content
expected_answer = reference_outputs["answer"] expected_answer = reference_outputs["answer"]
if expected_answer is None: if expected_answer is None:
return True return {"score": True, "comment": "No expected answer provided, auto-pass."}
user_msg = ( user_msg = (
f"ACTUAL ANSWER: {actual_answer}" f"ACTUAL ANSWER: {actual_answer}"
@@ -66,7 +68,24 @@ class Validator:
] ]
) )
return response.content.upper() == "CORRECT" response_text = response.content
# Parse the explanation and judgment from the response
explanation = ""
is_correct = False
if "EXPLANATION:" in response_text:
parts = response_text.split("JUDGMENT:")
explanation = parts[0].replace("EXPLANATION:", "").strip()
if len(parts) > 1:
judgment = parts[1].strip().upper()
is_correct = "CORRECT" in judgment and "INCORRECT" not in judgment
else:
# Fallback: check if response contains CORRECT/INCORRECT
explanation = response_text
is_correct = "CORRECT" in response_text.upper() and "INCORRECT" not in response_text.upper()
return {"score": is_correct, "comment": explanation}
def val_tool_use(self, inputs:dict, outputs:dict, reference_outputs:dict)->float: def val_tool_use(self, inputs:dict, outputs:dict, reference_outputs:dict)->float:
tool_uses:List[str] = reference_outputs.get("tool_use") tool_uses:List[str] = reference_outputs.get("tool_use")