Files
lang-agent/fastapi_server/test_openai_client.py
2025-12-26 14:51:41 +08:00

127 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""
Test for OpenAI-compatible API against server_openai.py
Instructions:
- Start the OpenAI-compatible server first, e.g.:
python fastapi_server/server_openai.py --llm_name qwen-plus --llm_provider openai --base_url https://dashscope.aliyuncs.com/compatible-mode/v1
- Or with uvicorn:
uvicorn fastapi_server.server_openai:app --host 0.0.0.0 --port 8589 --reload
- Set BASE_URL below to the server base URL you started.
"""
import os
from dotenv import load_dotenv
from loguru import logger
TAG = __name__
load_dotenv()
try:
from openai import OpenAI
except Exception as e:
print("openai package not found. Please install it: pip install openai")
raise
# <<< Paste your running FastAPI base url here >>>
BASE_URL = os.getenv("OPENAI_BASE_URL", "http://127.0.0.1:8589/v1")
# Test configuration matching the server setup
# llm_name: "qwen-plus"
# llm_provider: "openai"
# base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
# Test messages
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "use calculator to calculate 1234*5641"},
]
def test_streaming():
"""Test streaming chat completion"""
print("\n" + "="*60)
print("Testing STREAMING chat completion...")
print("="*60 + "\n")
client = OpenAI(
base_url=BASE_URL,
api_key="test-key" # Dummy key for testing
)
try:
stream = client.chat.completions.create(
model="qwen-plus", # Using qwen-plus as configured
messages=messages,
stream=True
)
full_response = ""
for chunk in stream:
if chunk.choices[0].delta.content is not None:
content = chunk.choices[0].delta.content
full_response += content
print(content, end="", flush=True)
print("\n\n" + "-"*60)
print(f"Full streaming response length: {len(full_response)}")
print("-"*60)
return full_response
except Exception as e:
logger.error(f"Streaming test error: {e}")
raise
def test_non_streaming():
"""Test non-streaming chat completion"""
print("\n" + "="*60)
print("Testing NON-STREAMING chat completion...")
print("="*60 + "\n")
client = OpenAI(
base_url=BASE_URL,
api_key="test-key" # Dummy key for testing
)
try:
response = client.chat.completions.create(
model="qwen-plus", # Using qwen-plus as configured
messages=messages,
stream=False
)
content = response.choices[0].message.content
print(f"Response: {content}")
print("\n" + "-"*60)
print(f"Full non-streaming response length: {len(content)}")
print(f"Finish reason: {response.choices[0].finish_reason}")
print("-"*60)
return content
except Exception as e:
logger.error(f"Non-streaming test error: {e}")
raise
def main():
print(f"\nUsing base_url = {BASE_URL}\n")
# Test both streaming and non-streaming
streaming_result = test_streaming()
non_streaming_result = test_non_streaming()
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Streaming response length: {len(streaming_result)}")
print(f"Non-streaming response length: {len(non_streaming_result)}")
print("\nBoth tests completed successfully!")
if __name__ == "__main__":
main()