This commit is contained in:
jijiahao
2025-10-30 16:28:53 +08:00
4 changed files with 82 additions and 15 deletions

View File

@@ -50,8 +50,42 @@ class InstantiateConfig(PrintableConfig):
def save_config(self, filename: str) -> None:
"""Save the config to a YAML file."""
def mask_value(key, value):
# Apply masking if key is secret-like
if isinstance(value, str) and self.is_secrete(key):
sval = str(value)
return sval[:3] + "*" * (len(sval) - 6) + sval[-3:]
return value
def to_masked_serializable(obj):
# Recursively convert dataclasses and containers to serializable with masked secrets
if is_dataclass(obj):
out = {}
for k, v in vars(obj).items():
if is_dataclass(v) or isinstance(v, (dict, list, tuple)):
out[k] = to_masked_serializable(v)
else:
out[k] = mask_value(k, v)
return out
if isinstance(obj, dict):
out = {}
for k, v in obj.items():
if is_dataclass(v) or isinstance(v, (dict, list, tuple)):
out[k] = to_masked_serializable(v)
else:
# k might be a non-string; convert to str for is_secrete check consistency
key_str = str(k)
out[k] = mask_value(key_str, v)
return out
if isinstance(obj, list):
return [to_masked_serializable(v) for v in obj]
if isinstance(obj, tuple):
return tuple(to_masked_serializable(v) for v in obj)
return obj
masked = to_masked_serializable(self)
with open(filename, 'w') as f:
yaml.dump(self, f)
yaml.dump(masked, f)
logger.info(f"[yellow]config saved to: {filename}[/yellow]")
def get_name(self):

View File

@@ -25,7 +25,7 @@ class EvaluatorConfig(InstantiateConfig):
experiment_desc:str = "testing if this works or not"
"""describe the experiment"""
dataset_name:Literal["Toxic Queries"] = "dev_langagent"
dataset_name:Literal["Toxic Queries"] = "xiao_zhan"
"""name of the dataset to evaluate"""
log_dir:str = "logs"
@@ -73,7 +73,7 @@ class Evaluator:
head_path = osp.join(self.config.log_dir, f"{self.dataset.name}-{self.config.experiment_prefix}")
n_exp = len(glob.glob(f"{head_path}*"))
exp_save_f = osp.join(f"{head_path}-{n_exp}.csv")
exp_save_f = f"{head_path}-{n_exp}.csv"
df = self.result.to_pandas()
logger.info(f"saving experiment results to: {exp_save_f}")
@@ -93,4 +93,6 @@ class Evaluator:
df_m.to_csv(metric_f)
self.config.save_config(f"{head_path}-{n_exp}.yml")

View File

@@ -51,6 +51,9 @@ class Validator:
actual_answer = outputs["output"][-1].content
expected_answer = reference_outputs["answer"]
if expected_answer is None:
return True
user_msg = (
f"ACTUAL ANSWER: {actual_answer}"
f"\n\nEXPECTED ANSWER: {expected_answer}"
@@ -77,7 +80,7 @@ class Validator:
st_cond = False
ref_tool = ref_tool.lower()
for msg in tool_msgs:
st_cond = ref_tool in msg.name.lower()
st_cond = msg.name.lower() in ref_tool
if st_cond:
break
tool_used.append(st_cond)