first commit

2026-03-04 17:23:52 +08:00
parent 26a0b3507d
commit cfabc52026
41 changed files with 0 additions and 0 deletions
--- a/core/executor/init.py
+++ b/core/executor/init.py
--- a/core/executor/pycache/init.cpython-313.pyc
+++ b/core/executor/pycache/init.cpython-313.pyc
--- a/core/executor/pycache/pyautogui_executor.cpython-313.pyc
+++ b/core/executor/pycache/pyautogui_executor.cpython-313.pyc
--- a/core/executor/pycache/qwen_ai_executor.cpython-313.pyc
+++ b/core/executor/pycache/qwen_ai_executor.cpython-313.pyc
--- a/core/executor/pyautogui_executor.py
+++ b/core/executor/pyautogui_executor.py
@@ -0,0 +1,156 @@
+import pyautogui
+import time
+import subprocess
+from pathlib import Path
+from typing import Optional, Dict, Any, List
+
+BASE_DIR = Path(__file__).parent.parent.parent
+import sys
+sys.path.insert(0, str(BASE_DIR))
+
+from wechat_auto.config import settings
+from wechat_auto.utils.logger import logger
+from wechat_auto.utils.retry import sync_retry
+from wechat_auto.models.activity import ActivityModel
+
+
+class PyAutoGUIExecutor:
+    def __init__(self):
+        pyautogui.FAILSAFE = settings.failsafe
+        pyautogui.PAUSE = settings.click_pause
+        self.screenshot_dir = Path(settings.screenshot_dir)
+        self.screenshot_dir.mkdir(parents=True, exist_ok=True)
+        
+        # 微信窗口已知位置（从xwininfo获取）
+        self.wechat_window = {
+            'x': 877,
+            'y': 207,
+            'width': 980,
+            'height': 710
+        }
+    
+    def click_at(self, x: int, y: int, button: str = 'left'):
+        """在指定位置点击"""
+        pyautogui.click(x, y, button=button)
+        logger.info(f"点击坐标：({x}, {y})")
+    
+    def screenshot(self, name: str = None):
+        """截图保存"""
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        filename = f"{name or 'action'}_{timestamp}.png"
+        filepath = self.screenshot_dir / filename
+        try:
+            subprocess.run(['scrot', str(filepath)], capture_output=True, timeout=5)
+        except:
+            pass
+        logger.info(f"截图：{filepath}")
+    
+    def _input_text(self, text: str):
+        pyautogui.write(text, interval=0.05)
+        logger.info(f"输入文本：{text[:30]}...")
+    
+    def _wait(self, seconds: float = 1.0):
+        time.sleep(seconds)
+    
+    @sync_retry(max_retries=2, base_delay=2.0)
+    def execute(self, activity: ActivityModel) -> Dict[str, Any]:
+        logger.info(f"开始执行 pyautogui 方案，发布活动：{activity.title}")
+        
+        self.screenshot("start")
+        
+        steps = self._get_publish_steps(activity)
+        
+        for i, step in enumerate(steps):
+            logger.info(f"执行步骤 {i+1}/{len(steps)}: {step['description']}")
+            try:
+                step['action']()
+                self.screenshot(f"step_{i+1}")
+                self._wait(step.get('wait_after', 1.0))
+            except Exception as e:
+                logger.error(f"步骤 {i+1} 失败：{e}")
+                self.screenshot(f"error_step_{i+1}")
+                raise
+        
+        logger.info("pyautogui 方案执行成功")
+        return {"status": "success", "method": "pyautogui"}
+    
+    def _get_publish_steps(self, activity: ActivityModel) -> List[Dict]:
+        wx = self.wechat_window['x']
+        wy = self.wechat_window['y']
+        ww = self.wechat_window['width']
+        wh = self.wechat_window['height']
+        
+        return [
+            {
+                'description': '点击桌面微信图标',
+                'action': lambda: self.click_at(288, 162),
+                'wait_after': 4.0
+            },
+            {
+                'description': '点击左侧小程序图标',
+                'action': lambda: self.click_at(wx + int(ww * 0.04), wy + int(wh * 0.22)),
+                'wait_after': 2.0
+            },
+            {
+                'description': '点击一见星球小程序',
+                'action': lambda: self.click_at(wx + int(ww * 0.35), wy + int(wh * 0.25)),
+                'wait_after': 3.0
+            },
+            {
+                'description': '点击发布活动按钮',
+                'action': lambda: self.click_at(wx + int(ww * 0.5), wy + int(wh * 0.12)),
+                'wait_after': 2.0
+            },
+            {
+                'description': '输入活动标题',
+                'action': lambda: self._input_title(activity.title),
+                'wait_after': 1.0
+            },
+            {
+                'description': '输入活动内容',
+                'action': lambda: self._input_content(activity.content),
+                'wait_after': 1.0
+            },
+            {
+                'description': '点击提交按钮',
+                'action': lambda: self._click_submit(),
+                'wait_after': 2.0
+            },
+        ]
+    
+    def _input_title(self, title: str):
+        """输入活动标题"""
+        wx = self.wechat_window['x']
+        wy = self.wechat_window['y']
+        ww = self.wechat_window['width']
+        wh = self.wechat_window['height']
+        
+        # 点击标题输入框
+        self.click_at(wx + int(ww * 0.3), wy + int(wh * 0.25))
+        self._wait(0.5)
+        self._input_text(title)
+        logger.info(f"已输入标题：{title}")
+    
+    def _input_content(self, content: str):
+        """输入活动内容"""
+        wx = self.wechat_window['x']
+        wy = self.wechat_window['y']
+        ww = self.wechat_window['width']
+        wh = self.wechat_window['height']
+        
+        # 点击内容输入框
+        self.click_at(wx + int(ww * 0.3), wy + int(wh * 0.4))
+        self._wait(0.5)
+        self._input_text(content)
+        logger.info(f"已输入内容：{content[:30]}...")
+    
+    def _click_submit(self):
+        """点击提交按钮"""
+        wx = self.wechat_window['x']
+        wy = self.wechat_window['y']
+        ww = self.wechat_window['width']
+        wh = self.wechat_window['height']
+        
+        # 点击提交按钮
+        self.click_at(wx + int(ww * 0.7), wy + int(wh * 0.8))
+        logger.info("已点击提交按钮")
--- a/core/executor/qwen_ai_executor.py
+++ b/core/executor/qwen_ai_executor.py
@@ -0,0 +1,197 @@
+import os
+import json
+import base64
+import asyncio
+import subprocess
+import time
+from pathlib import Path
+from typing import Dict, Any, Optional
+import pyautogui
+import requests
+from wechat_auto.config import settings
+from wechat_auto.utils.logger import logger
+from wechat_auto.models.activity import ActivityModel
+
+
+class QwenAIExecutor:
+    def __init__(self, api_key: str = None):
+        self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY") or settings.dashscope_api_key
+        if not self.api_key:
+            raise ValueError("未配置DASHSCOPE_API_KEY")
+        
+        self.endpoint = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"
+        self.model = "qwen-vl-plus"
+        self.screenshot_dir = Path(settings.screenshot_dir)
+        self.screenshot_dir.mkdir(parents=True, exist_ok=True)
+        self.max_steps = 15
+    
+    def _screenshot(self) -> str:
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        filepath = self.screenshot_dir / f"ai_step_{timestamp}.png"
+        
+        try:
+            subprocess.run(
+                ['scrot', str(filepath)],
+                capture_output=True,
+                timeout=5
+            )
+        except FileNotFoundError:
+            pyautogui.screenshot(str(filepath))
+        
+        logger.debug(f"AI截图: {filepath}")
+        return str(filepath)
+    
+    def _encode_image(self, image_path: str) -> str:
+        with open(image_path, 'rb') as f:
+            return base64.b64encode(f.read()).decode('utf-8')
+    
+    def _call_qwen(self, prompt: str, image_base64: str) -> Dict[str, Any]:
+        headers = {
+            'Authorization': f'Bearer {self.api_key}',
+            'Content-Type': 'application/json'
+        }
+        
+        payload = {
+            "model": self.model,
+            "input": {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"image": f"data:image/png;base64,{image_base64}"},
+                            {"text": prompt}
+                        ]
+                    }
+                ]
+            },
+            "parameters": {
+                "max_tokens": 2000
+            }
+        }
+        
+        response = requests.post(self.endpoint, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+        
+        result = response.json()
+        content = result['output']['choices'][0]['message']['content']
+        
+        try:
+            return json.loads(content)
+        except json.JSONDecodeError:
+            return {"action": "continue", "reason": content}
+    
+    def _execute_action(self, action: str, params: Dict[str, Any]):
+        if action == "click":
+            x, y = params.get('x', 0), params.get('y', 0)
+            pyautogui.click(x, y)
+            logger.info(f"AI点击: ({x}, {y})")
+        
+        elif action == "type":
+            text = params.get('text', '')
+            pyautogui.write(text, interval=0.05)
+            logger.info(f"AI输入: {text[:20]}...")
+        
+        elif action == "press":
+            key = params.get('key', '')
+            pyautogui.press(key)
+            logger.info(f"AI按键: {key}")
+        
+        elif action == "wait":
+            seconds = params.get('seconds', 1)
+            time.sleep(seconds)
+            logger.info(f"AI等待: {seconds}秒")
+        
+        elif action == "hotkey":
+            keys = params.get('keys', [])
+            pyautogui.hotkey(*keys)
+            logger.info(f"AI快捷键: {keys}")
+        
+        elif action == "scroll":
+            clicks = params.get('clicks', 0)
+            pyautogui.scroll(clicks)
+            logger.info(f"AI滚动: {clicks}")
+        
+        elif action == "done":
+            logger.info("AI任务完成")
+        
+        elif action == "continue":
+            logger.info(f"AI继续: {params.get('reason', '无原因')}")
+        
+        else:
+            logger.warning(f"未知AI动作: {action}")
+    
+    async def execute(self, activity: ActivityModel) -> Dict[str, Any]:
+        logger.info(f"开始执行Qwen AI方案，发布活动: {activity.title}")
+        
+        prompt = self._build_prompt(activity)
+        
+        for step in range(self.max_steps):
+            logger.info(f"AI执行步骤 {step + 1}/{self.max_steps}")
+            
+            screenshot_path = self._screenshot()
+            image_b64 = self._encode_image(screenshot_path)
+            
+            try:
+                result = self._call_qwen(prompt, image_b64)
+            except Exception as e:
+                logger.error(f"调用Qwen API失败: {e}")
+                await asyncio.sleep(2)
+                continue
+            
+            action = result.get('action', 'continue')
+            params = result.get('params', {})
+            
+            self._execute_action(action, params)
+            
+            if action == "done":
+                logger.info("Qwen AI方案执行成功")
+                return {"status": "success", "method": "qwen_ai"}
+            
+            await asyncio.sleep(1)
+        
+        logger.error("Qwen AI方案执行超时")
+        return {"status": "failed", "error": "执行超时"}
+    
+    def _build_prompt(self, activity: ActivityModel) -> str:
+        prompt = f"""你正在控制一台Linux电脑的微信客户端。请根据当前屏幕内容，帮我完成以下任务：
+
+任务：在微信小程序中发布一个活动
+
+活动信息：
+- 标题：{activity.title}
+- 内容：{activity.content}
+"""
+        
+        if activity.start_time:
+            prompt += f"- 开始时间：{activity.start_time}\n"
+        if activity.end_time:
+            prompt += f"- 结束时间：{activity.end_time}\n"
+        if activity.location:
+            prompt += f"- 地点：{activity.location}\n"
+        
+        prompt += """
+请分析当前屏幕，输出JSON格式的下一个操作指令：
+
+```json
+{
+  "action": "click|type|press|wait|scroll|hotkey|done|continue",
+  "params": {
+    "x": 100,
+    "y": 200,
+    "text": "要输入的文字",
+    "key": "enter",
+    "seconds": 1,
+    "clicks": -300,
+    "keys": ["ctrl", "v"]
+  },
+  "reason": "操作原因说明"
+}
+```
+
+注意事项：
+1. 点击位置使用绝对坐标
+2. 如果任务已完成，action设为"done"
+3. 如果需要继续下一步，action设为"continue"
+4. 先找到并点击小程序入口，然后找到目标小程序，点击发布活动按钮，填写表单并提交
+"""
+        return prompt