first commit

This commit is contained in:
2026-03-04 17:23:52 +08:00
parent 26a0b3507d
commit cfabc52026
41 changed files with 0 additions and 0 deletions

View File

Binary file not shown.

View File

@@ -0,0 +1,156 @@
import pyautogui
import time
import subprocess
from pathlib import Path
from typing import Optional, Dict, Any, List
BASE_DIR = Path(__file__).parent.parent.parent
import sys
sys.path.insert(0, str(BASE_DIR))
from wechat_auto.config import settings
from wechat_auto.utils.logger import logger
from wechat_auto.utils.retry import sync_retry
from wechat_auto.models.activity import ActivityModel
class PyAutoGUIExecutor:
def __init__(self):
pyautogui.FAILSAFE = settings.failsafe
pyautogui.PAUSE = settings.click_pause
self.screenshot_dir = Path(settings.screenshot_dir)
self.screenshot_dir.mkdir(parents=True, exist_ok=True)
# 微信窗口已知位置从xwininfo获取
self.wechat_window = {
'x': 877,
'y': 207,
'width': 980,
'height': 710
}
def click_at(self, x: int, y: int, button: str = 'left'):
"""在指定位置点击"""
pyautogui.click(x, y, button=button)
logger.info(f"点击坐标:({x}, {y})")
def screenshot(self, name: str = None):
"""截图保存"""
timestamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"{name or 'action'}_{timestamp}.png"
filepath = self.screenshot_dir / filename
try:
subprocess.run(['scrot', str(filepath)], capture_output=True, timeout=5)
except:
pass
logger.info(f"截图:{filepath}")
def _input_text(self, text: str):
pyautogui.write(text, interval=0.05)
logger.info(f"输入文本:{text[:30]}...")
def _wait(self, seconds: float = 1.0):
time.sleep(seconds)
@sync_retry(max_retries=2, base_delay=2.0)
def execute(self, activity: ActivityModel) -> Dict[str, Any]:
logger.info(f"开始执行 pyautogui 方案,发布活动:{activity.title}")
self.screenshot("start")
steps = self._get_publish_steps(activity)
for i, step in enumerate(steps):
logger.info(f"执行步骤 {i+1}/{len(steps)}: {step['description']}")
try:
step['action']()
self.screenshot(f"step_{i+1}")
self._wait(step.get('wait_after', 1.0))
except Exception as e:
logger.error(f"步骤 {i+1} 失败:{e}")
self.screenshot(f"error_step_{i+1}")
raise
logger.info("pyautogui 方案执行成功")
return {"status": "success", "method": "pyautogui"}
def _get_publish_steps(self, activity: ActivityModel) -> List[Dict]:
wx = self.wechat_window['x']
wy = self.wechat_window['y']
ww = self.wechat_window['width']
wh = self.wechat_window['height']
return [
{
'description': '点击桌面微信图标',
'action': lambda: self.click_at(288, 162),
'wait_after': 4.0
},
{
'description': '点击左侧小程序图标',
'action': lambda: self.click_at(wx + int(ww * 0.04), wy + int(wh * 0.22)),
'wait_after': 2.0
},
{
'description': '点击一见星球小程序',
'action': lambda: self.click_at(wx + int(ww * 0.35), wy + int(wh * 0.25)),
'wait_after': 3.0
},
{
'description': '点击发布活动按钮',
'action': lambda: self.click_at(wx + int(ww * 0.5), wy + int(wh * 0.12)),
'wait_after': 2.0
},
{
'description': '输入活动标题',
'action': lambda: self._input_title(activity.title),
'wait_after': 1.0
},
{
'description': '输入活动内容',
'action': lambda: self._input_content(activity.content),
'wait_after': 1.0
},
{
'description': '点击提交按钮',
'action': lambda: self._click_submit(),
'wait_after': 2.0
},
]
def _input_title(self, title: str):
"""输入活动标题"""
wx = self.wechat_window['x']
wy = self.wechat_window['y']
ww = self.wechat_window['width']
wh = self.wechat_window['height']
# 点击标题输入框
self.click_at(wx + int(ww * 0.3), wy + int(wh * 0.25))
self._wait(0.5)
self._input_text(title)
logger.info(f"已输入标题:{title}")
def _input_content(self, content: str):
"""输入活动内容"""
wx = self.wechat_window['x']
wy = self.wechat_window['y']
ww = self.wechat_window['width']
wh = self.wechat_window['height']
# 点击内容输入框
self.click_at(wx + int(ww * 0.3), wy + int(wh * 0.4))
self._wait(0.5)
self._input_text(content)
logger.info(f"已输入内容:{content[:30]}...")
def _click_submit(self):
"""点击提交按钮"""
wx = self.wechat_window['x']
wy = self.wechat_window['y']
ww = self.wechat_window['width']
wh = self.wechat_window['height']
# 点击提交按钮
self.click_at(wx + int(ww * 0.7), wy + int(wh * 0.8))
logger.info("已点击提交按钮")

View File

@@ -0,0 +1,197 @@
import os
import json
import base64
import asyncio
import subprocess
import time
from pathlib import Path
from typing import Dict, Any, Optional
import pyautogui
import requests
from wechat_auto.config import settings
from wechat_auto.utils.logger import logger
from wechat_auto.models.activity import ActivityModel
class QwenAIExecutor:
def __init__(self, api_key: str = None):
self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY") or settings.dashscope_api_key
if not self.api_key:
raise ValueError("未配置DASHSCOPE_API_KEY")
self.endpoint = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"
self.model = "qwen-vl-plus"
self.screenshot_dir = Path(settings.screenshot_dir)
self.screenshot_dir.mkdir(parents=True, exist_ok=True)
self.max_steps = 15
def _screenshot(self) -> str:
timestamp = time.strftime("%Y%m%d_%H%M%S")
filepath = self.screenshot_dir / f"ai_step_{timestamp}.png"
try:
subprocess.run(
['scrot', str(filepath)],
capture_output=True,
timeout=5
)
except FileNotFoundError:
pyautogui.screenshot(str(filepath))
logger.debug(f"AI截图: {filepath}")
return str(filepath)
def _encode_image(self, image_path: str) -> str:
with open(image_path, 'rb') as f:
return base64.b64encode(f.read()).decode('utf-8')
def _call_qwen(self, prompt: str, image_base64: str) -> Dict[str, Any]:
headers = {
'Authorization': f'Bearer {self.api_key}',
'Content-Type': 'application/json'
}
payload = {
"model": self.model,
"input": {
"messages": [
{
"role": "user",
"content": [
{"image": f"data:image/png;base64,{image_base64}"},
{"text": prompt}
]
}
]
},
"parameters": {
"max_tokens": 2000
}
}
response = requests.post(self.endpoint, headers=headers, json=payload, timeout=60)
response.raise_for_status()
result = response.json()
content = result['output']['choices'][0]['message']['content']
try:
return json.loads(content)
except json.JSONDecodeError:
return {"action": "continue", "reason": content}
def _execute_action(self, action: str, params: Dict[str, Any]):
if action == "click":
x, y = params.get('x', 0), params.get('y', 0)
pyautogui.click(x, y)
logger.info(f"AI点击: ({x}, {y})")
elif action == "type":
text = params.get('text', '')
pyautogui.write(text, interval=0.05)
logger.info(f"AI输入: {text[:20]}...")
elif action == "press":
key = params.get('key', '')
pyautogui.press(key)
logger.info(f"AI按键: {key}")
elif action == "wait":
seconds = params.get('seconds', 1)
time.sleep(seconds)
logger.info(f"AI等待: {seconds}")
elif action == "hotkey":
keys = params.get('keys', [])
pyautogui.hotkey(*keys)
logger.info(f"AI快捷键: {keys}")
elif action == "scroll":
clicks = params.get('clicks', 0)
pyautogui.scroll(clicks)
logger.info(f"AI滚动: {clicks}")
elif action == "done":
logger.info("AI任务完成")
elif action == "continue":
logger.info(f"AI继续: {params.get('reason', '无原因')}")
else:
logger.warning(f"未知AI动作: {action}")
async def execute(self, activity: ActivityModel) -> Dict[str, Any]:
logger.info(f"开始执行Qwen AI方案发布活动: {activity.title}")
prompt = self._build_prompt(activity)
for step in range(self.max_steps):
logger.info(f"AI执行步骤 {step + 1}/{self.max_steps}")
screenshot_path = self._screenshot()
image_b64 = self._encode_image(screenshot_path)
try:
result = self._call_qwen(prompt, image_b64)
except Exception as e:
logger.error(f"调用Qwen API失败: {e}")
await asyncio.sleep(2)
continue
action = result.get('action', 'continue')
params = result.get('params', {})
self._execute_action(action, params)
if action == "done":
logger.info("Qwen AI方案执行成功")
return {"status": "success", "method": "qwen_ai"}
await asyncio.sleep(1)
logger.error("Qwen AI方案执行超时")
return {"status": "failed", "error": "执行超时"}
def _build_prompt(self, activity: ActivityModel) -> str:
prompt = f"""你正在控制一台Linux电脑的微信客户端。请根据当前屏幕内容帮我完成以下任务
任务:在微信小程序中发布一个活动
活动信息:
- 标题:{activity.title}
- 内容:{activity.content}
"""
if activity.start_time:
prompt += f"- 开始时间:{activity.start_time}\n"
if activity.end_time:
prompt += f"- 结束时间:{activity.end_time}\n"
if activity.location:
prompt += f"- 地点:{activity.location}\n"
prompt += """
请分析当前屏幕输出JSON格式的下一个操作指令
```json
{
"action": "click|type|press|wait|scroll|hotkey|done|continue",
"params": {
"x": 100,
"y": 200,
"text": "要输入的文字",
"key": "enter",
"seconds": 1,
"clicks": -300,
"keys": ["ctrl", "v"]
},
"reason": "操作原因说明"
}
```
注意事项:
1. 点击位置使用绝对坐标
2. 如果任务已完成action设为"done"
3. 如果需要继续下一步action设为"continue"
4. 先找到并点击小程序入口,然后找到目标小程序,点击发布活动按钮,填写表单并提交
"""
return prompt