stream test

2025-11-05 03:00:50 +08:00
parent 304fe0879c
commit 6d481fb9fa
1 changed files with 79 additions and 0 deletions
--- a/fastapi_server/test_openai_client.py
+++ b/fastapi_server/test_openai_client.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""
+Simple test for OpenAI client chat.completions.create
+"""
+import os
+import httpx
+import openai
+from dotenv import load_dotenv
+
+load_dotenv()
+
+print("Initializing OpenAI client...")
+print(f"Base URL: http://localhost:8488/v1")
+print(f"API Key set: {'Yes' if os.getenv('ALI_API_KEY') else 'No'}")
+
+# Initialize client (pointing to FastAPI server from server.py)
+client = openai.OpenAI(
+    api_key=os.getenv("ALI_API_KEY"),
+    base_url="http://localhost:8488/v1",
+    timeout=httpx.Timeout(60.0)
+)
+
+print("\nTesting chat completion (non-streaming)...")
+# try:
+#     # Test chat completion (non-streaming first)
+#     response = client.chat.completions.create(
+#         model="qwen-flash",
+#         messages=[
+#             {'role':'system', 'content': 'your name is steve'}
+#             ,{"role": "user", "content": "Say hello!"}],
+#         stream=False,
+#         max_tokens=100,
+#         temperature=0.7
+#     )
+    
+#     print(f"Response ID: {response.id}")
+#     print(f"Model: {response.model}")
+#     print(f"Content: {response.choices[0].message.content}")
+#     print("\n✓ Non-streaming test successful!")
+    
+# except Exception as e:
+#     print(f"\n✗ Error: {str(e)}")
+#     import traceback
+#     traceback.print_exc()
+
+print("\nTesting chat completion (streaming)...")
+try:
+    # Test streaming with same message as non-streaming test
+    response = client.chat.completions.create(
+        model="qwen-flash",
+        messages=[
+            {'role':'system', 'content': 'your name is steve'},
+            {"role": "user", "content": "Say hello!"}
+        ],
+        stream=True,
+        max_tokens=100,
+        temperature=0.7
+    )
+    
+    print("Streaming response:")
+    full_content = ""
+    chunk_count = 0
+    for chunk in response:
+        chunk_count += 1
+        if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
+            if hasattr(chunk.choices[0], 'delta') and chunk.choices[0].delta.content:
+                content = chunk.choices[0].delta.content
+                print(content, end="", flush=True)
+                full_content += content
+    
+    print(f"\n\nTotal chunks received: {chunk_count}")
+    print(f"Full content: {repr(full_content)}")
+    print(f"Content length: {len(full_content)}")
+    print("\n✓ Streaming test successful!")
+    
+except Exception as e:
+    print(f"\n✗ Error: {str(e)}")
+    import traceback
+    traceback.print_exc()