forked from phoenix/litellm-mirror
build(requirements.txt): bump openai dep version
fixes proxies argument
This commit is contained in:
parent
711a1428f8
commit
5d250ca19a
2 changed files with 117 additions and 117 deletions
|
@ -1,6 +1,6 @@
|
||||||
# LITELLM PROXY DEPENDENCIES #
|
# LITELLM PROXY DEPENDENCIES #
|
||||||
anyio==4.4.0 # openai + http req.
|
anyio==4.4.0 # openai + http req.
|
||||||
openai==1.54.0 # openai req.
|
openai==1.55.3 # openai req.
|
||||||
fastapi==0.111.0 # server dep
|
fastapi==0.111.0 # server dep
|
||||||
backoff==2.2.1 # server dep
|
backoff==2.2.1 # server dep
|
||||||
pyyaml==6.0.0 # server dep
|
pyyaml==6.0.0 # server dep
|
||||||
|
|
|
@ -1,128 +1,128 @@
|
||||||
#### What this tests ####
|
# #### What this tests ####
|
||||||
# This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
|
# # This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
|
||||||
import sys, os, time, inspect, asyncio, traceback
|
# import sys, os, time, inspect, asyncio, traceback
|
||||||
from datetime import datetime
|
# from datetime import datetime
|
||||||
import pytest
|
# import pytest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
# sys.path.insert(0, os.path.abspath("../.."))
|
||||||
import openai, litellm, uuid
|
# import openai, litellm, uuid
|
||||||
from openai import AsyncAzureOpenAI
|
# from openai import AsyncAzureOpenAI
|
||||||
|
|
||||||
client = AsyncAzureOpenAI(
|
# client = AsyncAzureOpenAI(
|
||||||
api_key=os.getenv("AZURE_API_KEY"),
|
# api_key=os.getenv("AZURE_API_KEY"),
|
||||||
azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore
|
# azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore
|
||||||
api_version=os.getenv("AZURE_API_VERSION"),
|
# api_version=os.getenv("AZURE_API_VERSION"),
|
||||||
)
|
# )
|
||||||
|
|
||||||
model_list = [
|
# model_list = [
|
||||||
{
|
# {
|
||||||
"model_name": "azure-test",
|
# "model_name": "azure-test",
|
||||||
"litellm_params": {
|
# "litellm_params": {
|
||||||
"model": "azure/chatgpt-v-2",
|
# "model": "azure/chatgpt-v-2",
|
||||||
"api_key": os.getenv("AZURE_API_KEY"),
|
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||||
"api_base": os.getenv("AZURE_API_BASE"),
|
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
},
|
# },
|
||||||
}
|
# }
|
||||||
]
|
# ]
|
||||||
|
|
||||||
router = litellm.Router(model_list=model_list) # type: ignore
|
# router = litellm.Router(model_list=model_list) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
async def _openai_completion():
|
# async def _openai_completion():
|
||||||
try:
|
# try:
|
||||||
start_time = time.time()
|
# start_time = time.time()
|
||||||
response = await client.chat.completions.create(
|
# response = await client.chat.completions.create(
|
||||||
model="chatgpt-v-2",
|
# model="chatgpt-v-2",
|
||||||
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||||
stream=True,
|
# stream=True,
|
||||||
)
|
# )
|
||||||
time_to_first_token = None
|
# time_to_first_token = None
|
||||||
first_token_ts = None
|
# first_token_ts = None
|
||||||
init_chunk = None
|
# init_chunk = None
|
||||||
async for chunk in response:
|
# async for chunk in response:
|
||||||
if (
|
# if (
|
||||||
time_to_first_token is None
|
# time_to_first_token is None
|
||||||
and len(chunk.choices) > 0
|
# and len(chunk.choices) > 0
|
||||||
and chunk.choices[0].delta.content is not None
|
# and chunk.choices[0].delta.content is not None
|
||||||
):
|
# ):
|
||||||
first_token_ts = time.time()
|
# first_token_ts = time.time()
|
||||||
time_to_first_token = first_token_ts - start_time
|
# time_to_first_token = first_token_ts - start_time
|
||||||
init_chunk = chunk
|
# init_chunk = chunk
|
||||||
end_time = time.time()
|
# end_time = time.time()
|
||||||
print(
|
# print(
|
||||||
"OpenAI Call: ",
|
# "OpenAI Call: ",
|
||||||
init_chunk,
|
# init_chunk,
|
||||||
start_time,
|
# start_time,
|
||||||
first_token_ts,
|
# first_token_ts,
|
||||||
time_to_first_token,
|
# time_to_first_token,
|
||||||
end_time,
|
# end_time,
|
||||||
)
|
# )
|
||||||
return time_to_first_token
|
# return time_to_first_token
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
print(e)
|
# print(e)
|
||||||
return None
|
# return None
|
||||||
|
|
||||||
|
|
||||||
async def _router_completion():
|
# async def _router_completion():
|
||||||
try:
|
# try:
|
||||||
start_time = time.time()
|
# start_time = time.time()
|
||||||
response = await router.acompletion(
|
# response = await router.acompletion(
|
||||||
model="azure-test",
|
# model="azure-test",
|
||||||
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||||
stream=True,
|
# stream=True,
|
||||||
)
|
# )
|
||||||
time_to_first_token = None
|
# time_to_first_token = None
|
||||||
first_token_ts = None
|
# first_token_ts = None
|
||||||
init_chunk = None
|
# init_chunk = None
|
||||||
async for chunk in response:
|
# async for chunk in response:
|
||||||
if (
|
# if (
|
||||||
time_to_first_token is None
|
# time_to_first_token is None
|
||||||
and len(chunk.choices) > 0
|
# and len(chunk.choices) > 0
|
||||||
and chunk.choices[0].delta.content is not None
|
# and chunk.choices[0].delta.content is not None
|
||||||
):
|
# ):
|
||||||
first_token_ts = time.time()
|
# first_token_ts = time.time()
|
||||||
time_to_first_token = first_token_ts - start_time
|
# time_to_first_token = first_token_ts - start_time
|
||||||
init_chunk = chunk
|
# init_chunk = chunk
|
||||||
end_time = time.time()
|
# end_time = time.time()
|
||||||
print(
|
# print(
|
||||||
"Router Call: ",
|
# "Router Call: ",
|
||||||
init_chunk,
|
# init_chunk,
|
||||||
start_time,
|
# start_time,
|
||||||
first_token_ts,
|
# first_token_ts,
|
||||||
time_to_first_token,
|
# time_to_first_token,
|
||||||
end_time - first_token_ts,
|
# end_time - first_token_ts,
|
||||||
)
|
# )
|
||||||
return time_to_first_token
|
# return time_to_first_token
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
print(e)
|
# print(e)
|
||||||
return None
|
# return None
|
||||||
|
|
||||||
|
|
||||||
async def test_azure_completion_streaming():
|
# async def test_azure_completion_streaming():
|
||||||
"""
|
# """
|
||||||
Test azure streaming call - measure on time to first (non-null) token.
|
# Test azure streaming call - measure on time to first (non-null) token.
|
||||||
"""
|
# """
|
||||||
n = 3 # Number of concurrent tasks
|
# n = 3 # Number of concurrent tasks
|
||||||
## OPENAI AVG. TIME
|
# ## OPENAI AVG. TIME
|
||||||
tasks = [_openai_completion() for _ in range(n)]
|
# tasks = [_openai_completion() for _ in range(n)]
|
||||||
chat_completions = await asyncio.gather(*tasks)
|
# chat_completions = await asyncio.gather(*tasks)
|
||||||
successful_completions = [c for c in chat_completions if c is not None]
|
# successful_completions = [c for c in chat_completions if c is not None]
|
||||||
total_time = 0
|
# total_time = 0
|
||||||
for item in successful_completions:
|
# for item in successful_completions:
|
||||||
total_time += item
|
# total_time += item
|
||||||
avg_openai_time = total_time / 3
|
# avg_openai_time = total_time / 3
|
||||||
## ROUTER AVG. TIME
|
# ## ROUTER AVG. TIME
|
||||||
tasks = [_router_completion() for _ in range(n)]
|
# tasks = [_router_completion() for _ in range(n)]
|
||||||
chat_completions = await asyncio.gather(*tasks)
|
# chat_completions = await asyncio.gather(*tasks)
|
||||||
successful_completions = [c for c in chat_completions if c is not None]
|
# successful_completions = [c for c in chat_completions if c is not None]
|
||||||
total_time = 0
|
# total_time = 0
|
||||||
for item in successful_completions:
|
# for item in successful_completions:
|
||||||
total_time += item
|
# total_time += item
|
||||||
avg_router_time = total_time / 3
|
# avg_router_time = total_time / 3
|
||||||
## COMPARE
|
# ## COMPARE
|
||||||
print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
|
# print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
|
||||||
assert avg_router_time < avg_openai_time + 0.5
|
# assert avg_router_time < avg_openai_time + 0.5
|
||||||
|
|
||||||
|
|
||||||
# asyncio.run(test_azure_completion_streaming())
|
# # asyncio.run(test_azure_completion_streaming())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue