Files
bzg_skills/config.yaml
2026-03-09 22:03:09 +08:00

112 lines
1.9 KiB
YAML

# 定时任务配置
scheduler:
enabled: true
time: "09:00"
days:
- mon
- tue
- wed
- thu
- fri
max_instances: 3
coalesce: true
# 目标网站配置
targets:
- name: "国家税务总局"
url: "https://www.chinatax.gov.cn/"
list_paths:
- "/npsite/chinatax/zcwj/"
- "/npsite/chinatax/tzgg/"
keywords:
- "最新"
- "通知"
- "公告"
- "政策"
- "法规"
enabled: true
- name: "财政部"
url: "https://www.mof.gov.cn/"
list_paths:
- "/zhengwugongkai/zhengceku/zhengcefagui/"
keywords:
- "最新"
- "通知"
- "公告"
- "政策"
- "法规"
enabled: false
- name: "国家税务局"
url: "http://www.chinatax.gov.cn/"
list_paths:
- "/cloudfw/zcwj/"
keywords:
- "最新"
- "通知"
- "公告"
- "政策"
- "法规"
enabled: false
# 下载配置
download:
path: "./downloads"
formats:
- pdf
- doc
- docx
- txt
- xlsx
max_size: 52428800
timeout: 60
retry: 3
user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
# 去重配置
deduplication:
title_similarity: 0.8
content_similarity: 0.9
hash_algorithm: "simhash"
# 分类配置
categories:
- name: "税收政策"
keywords:
- "税收"
- "税务"
- "纳税"
- "税费"
- "增值税"
- "所得税"
priority: 1
- name: "通知公告"
keywords:
- "通知"
- "公告"
- "通告"
priority: 2
- name: "法规文件"
keywords:
- "法规"
- "条例"
- "规章"
- "办法"
- "细则"
priority: 3
- name: "其他政策"
keywords: []
priority: 99
# 日志配置
logging:
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file: "./logs/policy_retrieval.log"
max_bytes: 10485760
backup_count: 5