#!/usr/bin/env python3
"""
Simple test for OpenAI client chat.completions.create
"""
import os
import httpx
import openai
from dotenv import load_dotenv

load_dotenv()

print("Initializing OpenAI client...")
print(f"Base URL: http://localhost:8488/v1")
print(f"API Key set: {'Yes' if os.getenv('ALI_API_KEY') else 'No'}")

# Initialize client (pointing to FastAPI server from server.py)
client = openai.OpenAI(
    api_key=os.getenv("ALI_API_KEY"),
    base_url="http://localhost:8488/v1",
    timeout=httpx.Timeout(60.0)
)

print("\nTesting chat completion (non-streaming)...")
# try:
#     # Test chat completion (non-streaming first)
#     response = client.chat.completions.create(
#         model="qwen-flash",
#         messages=[
#             {'role':'system', 'content': 'your name is steve'}
#             ,{"role": "user", "content": "Say hello!"}],
#         stream=False,
#         max_tokens=100,
#         temperature=0.7
#     )
    
#     print(f"Response ID: {response.id}")
#     print(f"Model: {response.model}")
#     print(f"Content: {response.choices[0].message.content}")
#     print("\n✓ Non-streaming test successful!")
    
# except Exception as e:
#     print(f"\n✗ Error: {str(e)}")
#     import traceback
#     traceback.print_exc()

print("\nTesting chat completion (streaming)...")
try:
    # Test streaming with same message as non-streaming test
    response = client.chat.completions.create(
        model="qwen-flash",
        messages=[
            {'role':'system', 'content': 'your name is steve'},
            {"role": "user", "content": "Say hello!"}
        ],
        stream=True,
        max_tokens=100,
        temperature=0.7
    )
    
    print("Streaming response:")
    full_content = ""
    chunk_count = 0
    for chunk in response:
        chunk_count += 1
        if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
            if hasattr(chunk.choices[0], 'delta') and chunk.choices[0].delta.content:
                content = chunk.choices[0].delta.content
                print(content, end="", flush=True)
                full_content += content
    
    print(f"\n\nTotal chunks received: {chunk_count}")
    print(f"Full content: {repr(full_content)}")
    print(f"Content length: {len(full_content)}")
    print("\n✓ Streaming test successful!")
    
except Exception as e:
    print(f"\n✗ Error: {str(e)}")
    import traceback
    traceback.print_exc()