litellm/tests/local_testing/test_proxy_server_spend.py

82 lines
2.4 KiB
Python

# import openai, json, time, asyncio
# client = openai.AsyncOpenAI(
# api_key="sk-1234",
# base_url="http://0.0.0.0:8000"
# )
# super_fake_messages = [
# {
# "role": "user",
# "content": f"What's the weather like in San Francisco, Tokyo, and Paris? {time.time()}"
# },
# {
# "content": None,
# "role": "assistant",
# "tool_calls": [
# {
# "id": "1",
# "function": {
# "arguments": "{\"location\": \"San Francisco\", \"unit\": \"celsius\"}",
# "name": "get_current_weather"
# },
# "type": "function"
# },
# {
# "id": "2",
# "function": {
# "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}",
# "name": "get_current_weather"
# },
# "type": "function"
# },
# {
# "id": "3",
# "function": {
# "arguments": "{\"location\": \"Paris\", \"unit\": \"celsius\"}",
# "name": "get_current_weather"
# },
# "type": "function"
# }
# ]
# },
# {
# "tool_call_id": "1",
# "role": "tool",
# "name": "get_current_weather",
# "content": "{\"location\": \"San Francisco\", \"temperature\": \"90\", \"unit\": \"celsius\"}"
# },
# {
# "tool_call_id": "2",
# "role": "tool",
# "name": "get_current_weather",
# "content": "{\"location\": \"Tokyo\", \"temperature\": \"30\", \"unit\": \"celsius\"}"
# },
# {
# "tool_call_id": "3",
# "role": "tool",
# "name": "get_current_weather",
# "content": "{\"location\": \"Paris\", \"temperature\": \"50\", \"unit\": \"celsius\"}"
# }
# ]
# async def chat_completions():
# super_fake_response = await client.chat.completions.create(
# model="gpt-3.5-turbo",
# messages=super_fake_messages,
# seed=1337,
# stream=False
# ) # get a new response from the model where it can see the function response
# await asyncio.sleep(1)
# return super_fake_response
# async def loadtest_fn(n = 1):
# global num_task_cancelled_errors, exception_counts, chat_completions
# start = time.time()
# tasks = [chat_completions() for _ in range(n)]
# chat_completions = await asyncio.gather(*tasks)
# successful_completions = [c for c in chat_completions if c is not None]
# print(n, time.time() - start, len(successful_completions))
# # print(json.dumps(super_fake_response.model_dump(), indent=4))
# asyncio.run(loadtest_fn())