diff --git a/lang_agent/config.py b/lang_agent/config.py index dd9d307..c95499b 100644 --- a/lang_agent/config.py +++ b/lang_agent/config.py @@ -50,8 +50,42 @@ class InstantiateConfig(PrintableConfig): def save_config(self, filename: str) -> None: """Save the config to a YAML file.""" + def mask_value(key, value): + # Apply masking if key is secret-like + if isinstance(value, str) and self.is_secrete(key): + sval = str(value) + return sval[:3] + "*" * (len(sval) - 6) + sval[-3:] + return value + + def to_masked_serializable(obj): + # Recursively convert dataclasses and containers to serializable with masked secrets + if is_dataclass(obj): + out = {} + for k, v in vars(obj).items(): + if is_dataclass(v) or isinstance(v, (dict, list, tuple)): + out[k] = to_masked_serializable(v) + else: + out[k] = mask_value(k, v) + return out + if isinstance(obj, dict): + out = {} + for k, v in obj.items(): + if is_dataclass(v) or isinstance(v, (dict, list, tuple)): + out[k] = to_masked_serializable(v) + else: + # k might be a non-string; convert to str for is_secrete check consistency + key_str = str(k) + out[k] = mask_value(key_str, v) + return out + if isinstance(obj, list): + return [to_masked_serializable(v) for v in obj] + if isinstance(obj, tuple): + return tuple(to_masked_serializable(v) for v in obj) + return obj + + masked = to_masked_serializable(self) with open(filename, 'w') as f: - yaml.dump(self, f) + yaml.dump(masked, f) logger.info(f"[yellow]config saved to: {filename}[/yellow]") def get_name(self): diff --git a/lang_agent/eval/evaluator.py b/lang_agent/eval/evaluator.py index 72c39be..596e994 100644 --- a/lang_agent/eval/evaluator.py +++ b/lang_agent/eval/evaluator.py @@ -25,7 +25,7 @@ class EvaluatorConfig(InstantiateConfig): experiment_desc:str = "testing if this works or not" """describe the experiment""" - dataset_name:Literal["Toxic Queries"] = "dev_langagent" + dataset_name:Literal["Toxic Queries"] = "xiao_zhan" """name of the dataset to evaluate""" log_dir:str = "logs" @@ -73,7 +73,7 @@ class Evaluator: head_path = osp.join(self.config.log_dir, f"{self.dataset.name}-{self.config.experiment_prefix}") n_exp = len(glob.glob(f"{head_path}*")) - exp_save_f = osp.join(f"{head_path}-{n_exp}.csv") + exp_save_f = f"{head_path}-{n_exp}.csv" df = self.result.to_pandas() logger.info(f"saving experiment results to: {exp_save_f}") @@ -93,4 +93,6 @@ class Evaluator: df_m.to_csv(metric_f) + self.config.save_config(f"{head_path}-{n_exp}.yml") + diff --git a/lang_agent/eval/validator.py b/lang_agent/eval/validator.py index f680cb1..3c70862 100644 --- a/lang_agent/eval/validator.py +++ b/lang_agent/eval/validator.py @@ -51,6 +51,9 @@ class Validator: actual_answer = outputs["output"][-1].content expected_answer = reference_outputs["answer"] + if expected_answer is None: + return True + user_msg = ( f"ACTUAL ANSWER: {actual_answer}" f"\n\nEXPECTED ANSWER: {expected_answer}" @@ -77,7 +80,7 @@ class Validator: st_cond = False ref_tool = ref_tool.lower() for msg in tool_msgs: - st_cond = ref_tool in msg.name.lower() + st_cond = msg.name.lower() in ref_tool if st_cond: break tool_used.append(st_cond) diff --git a/scripts/make_eval_dataset.py b/scripts/make_eval_dataset.py index c008c8b..46aaae1 100644 --- a/scripts/make_eval_dataset.py +++ b/scripts/make_eval_dataset.py @@ -2,23 +2,51 @@ from langsmith import Client from loguru import logger -DATASET_NAME = "dev_langagent" +DATASET_NAME = "xiao_zhan" examples = [ { - "inputs": {"text": "介绍一下自己"}, - "outputs": {"answer": "我是小盏,是一个点餐助手"} - }, - { - "inputs": {"text": "用retrieve查询光予尘然后介绍"}, - "outputs": {"answer": "茉莉绿茶为底,清冽茶香中漫出玫珑蜜瓜的绵甜与凤梨的明亮果香,层次鲜活;顶部白柚茉莉泡沫轻盈漫过舌尖,带着微酸的清新感,让整体风味更显灵动", + "inputs": {"text": "今天有点热,推荐点茶喝一下"}, + "outputs": {"answer": None, "tool_use": ["retrieve"]} }, { - "inputs": {"text": ["我叫Steve", - "我叫什么名字?"]}, # list for conversation - "outputs": {"answer": "你叫Steve"} - } + "inputs": {"text": "有没有光予尘?"}, + "outputs": {"answer": "有的", + "tool_use": ["retrieve|get_resource"]} + }, + { + "inputs": {"text": "有没有关羽尘?"}, + "outputs": {"answer": "有的", + "tool_use": ["retrieve|get_resource"]} + }, + { + "inputs": {"text": ["我要购买一杯野星星", + "我要再加一杯"]}, + "outputs": {"answer": "你的野星星已经下单成功", + "tool_use": ["retrieve|get_resources", + "start_shopping_session", + "add_to_cart", + "create_wechat_pay", + "create_order_from_cart", + "update_cart_item"]} + }, + { + "inputs": {"text": ["我要购买一杯野星星", + "我现在点了些什么?"]}, + "outputs": {"answer": "一杯野星星", + "tool_use": ["query_wechat_order"]} + }, + { + "inputs": {"text": ["我要购买三杯野星星", + "现在取消所有我定了的饮品"]}, + "outputs": {"answer": "取消成功", + "tool_use": ["clear_cart"]} + }, + { + "inputs": {"text": "你是谁?"}, + "outputs": {"answer": "小盏"} + }, ] cli = Client()