From 6d481fb9fa2afa198ee6ed14bcc8b6c00938ef27 Mon Sep 17 00:00:00 2001 From: goulustis Date: Wed, 5 Nov 2025 03:00:50 +0800 Subject: [PATCH] stream test --- fastapi_server/test_openai_client.py | 79 ++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 fastapi_server/test_openai_client.py diff --git a/fastapi_server/test_openai_client.py b/fastapi_server/test_openai_client.py new file mode 100644 index 0000000..a2d345e --- /dev/null +++ b/fastapi_server/test_openai_client.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +""" +Simple test for OpenAI client chat.completions.create +""" +import os +import httpx +import openai +from dotenv import load_dotenv + +load_dotenv() + +print("Initializing OpenAI client...") +print(f"Base URL: http://localhost:8488/v1") +print(f"API Key set: {'Yes' if os.getenv('ALI_API_KEY') else 'No'}") + +# Initialize client (pointing to FastAPI server from server.py) +client = openai.OpenAI( + api_key=os.getenv("ALI_API_KEY"), + base_url="http://localhost:8488/v1", + timeout=httpx.Timeout(60.0) +) + +print("\nTesting chat completion (non-streaming)...") +# try: +# # Test chat completion (non-streaming first) +# response = client.chat.completions.create( +# model="qwen-flash", +# messages=[ +# {'role':'system', 'content': 'your name is steve'} +# ,{"role": "user", "content": "Say hello!"}], +# stream=False, +# max_tokens=100, +# temperature=0.7 +# ) + +# print(f"Response ID: {response.id}") +# print(f"Model: {response.model}") +# print(f"Content: {response.choices[0].message.content}") +# print("\n✓ Non-streaming test successful!") + +# except Exception as e: +# print(f"\n✗ Error: {str(e)}") +# import traceback +# traceback.print_exc() + +print("\nTesting chat completion (streaming)...") +try: + # Test streaming with same message as non-streaming test + response = client.chat.completions.create( + model="qwen-flash", + messages=[ + {'role':'system', 'content': 'your name is steve'}, + {"role": "user", "content": "Say hello!"} + ], + stream=True, + max_tokens=100, + temperature=0.7 + ) + + print("Streaming response:") + full_content = "" + chunk_count = 0 + for chunk in response: + chunk_count += 1 + if hasattr(chunk, 'choices') and len(chunk.choices) > 0: + if hasattr(chunk.choices[0], 'delta') and chunk.choices[0].delta.content: + content = chunk.choices[0].delta.content + print(content, end="", flush=True) + full_content += content + + print(f"\n\nTotal chunks received: {chunk_count}") + print(f"Full content: {repr(full_content)}") + print(f"Content length: {len(full_content)}") + print("\n✓ Streaming test successful!") + +except Exception as e: + print(f"\n✗ Error: {str(e)}") + import traceback + traceback.print_exc()