forked from phoenix/litellm-mirror
82 lines
2.4 KiB
Python
82 lines
2.4 KiB
Python
# import openai, json, time, asyncio
|
|
# client = openai.AsyncOpenAI(
|
|
# api_key="sk-1234",
|
|
# base_url="http://0.0.0.0:8000"
|
|
# )
|
|
|
|
# super_fake_messages = [
|
|
# {
|
|
# "role": "user",
|
|
# "content": f"What's the weather like in San Francisco, Tokyo, and Paris? {time.time()}"
|
|
# },
|
|
# {
|
|
# "content": None,
|
|
# "role": "assistant",
|
|
# "tool_calls": [
|
|
# {
|
|
# "id": "1",
|
|
# "function": {
|
|
# "arguments": "{\"location\": \"San Francisco\", \"unit\": \"celsius\"}",
|
|
# "name": "get_current_weather"
|
|
# },
|
|
# "type": "function"
|
|
# },
|
|
# {
|
|
# "id": "2",
|
|
# "function": {
|
|
# "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}",
|
|
# "name": "get_current_weather"
|
|
# },
|
|
# "type": "function"
|
|
# },
|
|
# {
|
|
# "id": "3",
|
|
# "function": {
|
|
# "arguments": "{\"location\": \"Paris\", \"unit\": \"celsius\"}",
|
|
# "name": "get_current_weather"
|
|
# },
|
|
# "type": "function"
|
|
# }
|
|
# ]
|
|
# },
|
|
# {
|
|
# "tool_call_id": "1",
|
|
# "role": "tool",
|
|
# "name": "get_current_weather",
|
|
# "content": "{\"location\": \"San Francisco\", \"temperature\": \"90\", \"unit\": \"celsius\"}"
|
|
# },
|
|
# {
|
|
# "tool_call_id": "2",
|
|
# "role": "tool",
|
|
# "name": "get_current_weather",
|
|
# "content": "{\"location\": \"Tokyo\", \"temperature\": \"30\", \"unit\": \"celsius\"}"
|
|
# },
|
|
# {
|
|
# "tool_call_id": "3",
|
|
# "role": "tool",
|
|
# "name": "get_current_weather",
|
|
# "content": "{\"location\": \"Paris\", \"temperature\": \"50\", \"unit\": \"celsius\"}"
|
|
# }
|
|
# ]
|
|
|
|
# async def chat_completions():
|
|
# super_fake_response = await client.chat.completions.create(
|
|
# model="gpt-3.5-turbo",
|
|
# messages=super_fake_messages,
|
|
# seed=1337,
|
|
# stream=False
|
|
# ) # get a new response from the model where it can see the function response
|
|
# await asyncio.sleep(1)
|
|
# return super_fake_response
|
|
|
|
# async def loadtest_fn(n = 1):
|
|
# global num_task_cancelled_errors, exception_counts, chat_completions
|
|
# start = time.time()
|
|
# tasks = [chat_completions() for _ in range(n)]
|
|
# chat_completions = await asyncio.gather(*tasks)
|
|
# successful_completions = [c for c in chat_completions if c is not None]
|
|
# print(n, time.time() - start, len(successful_completions))
|
|
|
|
# # print(json.dumps(super_fake_response.model_dump(), indent=4))
|
|
|
|
# asyncio.run(loadtest_fn())
|