stream test

2025-11-05 03:00:50 +08:00
parent 304fe0879c
commit 6d481fb9fa
1 changed files with 79 additions and 0 deletions
--- a/fastapi_server/test_openai_client.py
+++ b/fastapi_server/test_openai_client.py
@@ -0,0 +1,79 @@
 #!/usr/bin/env python3
 """
 Simple test for OpenAI client chat.completions.create
 """
 import os
 import httpx
 import openai
 from dotenv import load_dotenv
 load_dotenv()
 print("Initializing OpenAI client...")
 print(f"Base URL: http://localhost:8488/v1")
 print(f"API Key set: {'Yes' if os.getenv('ALI_API_KEY') else 'No'}")
 # Initialize client (pointing to FastAPI server from server.py)
 client = openai.OpenAI(
    api_key=os.getenv("ALI_API_KEY"),
    base_url="http://localhost:8488/v1",
    timeout=httpx.Timeout(60.0)
 )
 print("\nTesting chat completion (non-streaming)...")
 # try:
 #     # Test chat completion (non-streaming first)
 #     response = client.chat.completions.create(
 #         model="qwen-flash",
 #         messages=[
 #             {'role':'system', 'content': 'your name is steve'}
 #             ,{"role": "user", "content": "Say hello!"}],
 #         stream=False,
 #         max_tokens=100,
 #         temperature=0.7
 #     )
 #     print(f"Response ID: {response.id}")
 #     print(f"Model: {response.model}")
 #     print(f"Content: {response.choices[0].message.content}")
 #     print("\n✓ Non-streaming test successful!")
 # except Exception as e:
 #     print(f"\n✗ Error: {str(e)}")
 #     import traceback
 #     traceback.print_exc()
 print("\nTesting chat completion (streaming)...")
 try:
    # Test streaming with same message as non-streaming test
    response = client.chat.completions.create(
        model="qwen-flash",
        messages=[
            {'role':'system', 'content': 'your name is steve'},
            {"role": "user", "content": "Say hello!"}
        ],
        stream=True,
        max_tokens=100,
        temperature=0.7
    )
    print("Streaming response:")
    full_content = ""
    chunk_count = 0
    for chunk in response:
        chunk_count += 1
        if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
            if hasattr(chunk.choices[0], 'delta') and chunk.choices[0].delta.content:
                content = chunk.choices[0].delta.content
                print(content, end="", flush=True)
                full_content += content
    print(f"\n\nTotal chunks received: {chunk_count}")
    print(f"Full content: {repr(full_content)}")
    print(f"Content length: {len(full_content)}")
    print("\n✓ Streaming test successful!")
 except Exception as e:
    print(f"\n✗ Error: {str(e)}")
    import traceback
    traceback.print_exc()