build(requirements.txt): bump openai dep version

fixes proxies argument
This commit is contained in:
Krrish Dholakia 2024-11-29 21:11:12 -08:00
parent 711a1428f8
commit 5d250ca19a
2 changed files with 117 additions and 117 deletions

View file

@ -1,6 +1,6 @@
# LITELLM PROXY DEPENDENCIES # # LITELLM PROXY DEPENDENCIES #
anyio==4.4.0 # openai + http req. anyio==4.4.0 # openai + http req.
openai==1.54.0 # openai req. openai==1.55.3 # openai req.
fastapi==0.111.0 # server dep fastapi==0.111.0 # server dep
backoff==2.2.1 # server dep backoff==2.2.1 # server dep
pyyaml==6.0.0 # server dep pyyaml==6.0.0 # server dep

View file

@ -1,128 +1,128 @@
#### What this tests #### # #### What this tests ####
# This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk. # # This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
import sys, os, time, inspect, asyncio, traceback # import sys, os, time, inspect, asyncio, traceback
from datetime import datetime # from datetime import datetime
import pytest # import pytest
sys.path.insert(0, os.path.abspath("../..")) # sys.path.insert(0, os.path.abspath("../.."))
import openai, litellm, uuid # import openai, litellm, uuid
from openai import AsyncAzureOpenAI # from openai import AsyncAzureOpenAI
client = AsyncAzureOpenAI( # client = AsyncAzureOpenAI(
api_key=os.getenv("AZURE_API_KEY"), # api_key=os.getenv("AZURE_API_KEY"),
azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore # azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore
api_version=os.getenv("AZURE_API_VERSION"), # api_version=os.getenv("AZURE_API_VERSION"),
) # )
model_list = [ # model_list = [
{ # {
"model_name": "azure-test", # "model_name": "azure-test",
"litellm_params": { # "litellm_params": {
"model": "azure/chatgpt-v-2", # "model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"), # "api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"), # "api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"), # "api_version": os.getenv("AZURE_API_VERSION"),
}, # },
} # }
] # ]
router = litellm.Router(model_list=model_list) # type: ignore # router = litellm.Router(model_list=model_list) # type: ignore
async def _openai_completion(): # async def _openai_completion():
try: # try:
start_time = time.time() # start_time = time.time()
response = await client.chat.completions.create( # response = await client.chat.completions.create(
model="chatgpt-v-2", # model="chatgpt-v-2",
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}], # messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
stream=True, # stream=True,
) # )
time_to_first_token = None # time_to_first_token = None
first_token_ts = None # first_token_ts = None
init_chunk = None # init_chunk = None
async for chunk in response: # async for chunk in response:
if ( # if (
time_to_first_token is None # time_to_first_token is None
and len(chunk.choices) > 0 # and len(chunk.choices) > 0
and chunk.choices[0].delta.content is not None # and chunk.choices[0].delta.content is not None
): # ):
first_token_ts = time.time() # first_token_ts = time.time()
time_to_first_token = first_token_ts - start_time # time_to_first_token = first_token_ts - start_time
init_chunk = chunk # init_chunk = chunk
end_time = time.time() # end_time = time.time()
print( # print(
"OpenAI Call: ", # "OpenAI Call: ",
init_chunk, # init_chunk,
start_time, # start_time,
first_token_ts, # first_token_ts,
time_to_first_token, # time_to_first_token,
end_time, # end_time,
) # )
return time_to_first_token # return time_to_first_token
except Exception as e: # except Exception as e:
print(e) # print(e)
return None # return None
async def _router_completion(): # async def _router_completion():
try: # try:
start_time = time.time() # start_time = time.time()
response = await router.acompletion( # response = await router.acompletion(
model="azure-test", # model="azure-test",
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}], # messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
stream=True, # stream=True,
) # )
time_to_first_token = None # time_to_first_token = None
first_token_ts = None # first_token_ts = None
init_chunk = None # init_chunk = None
async for chunk in response: # async for chunk in response:
if ( # if (
time_to_first_token is None # time_to_first_token is None
and len(chunk.choices) > 0 # and len(chunk.choices) > 0
and chunk.choices[0].delta.content is not None # and chunk.choices[0].delta.content is not None
): # ):
first_token_ts = time.time() # first_token_ts = time.time()
time_to_first_token = first_token_ts - start_time # time_to_first_token = first_token_ts - start_time
init_chunk = chunk # init_chunk = chunk
end_time = time.time() # end_time = time.time()
print( # print(
"Router Call: ", # "Router Call: ",
init_chunk, # init_chunk,
start_time, # start_time,
first_token_ts, # first_token_ts,
time_to_first_token, # time_to_first_token,
end_time - first_token_ts, # end_time - first_token_ts,
) # )
return time_to_first_token # return time_to_first_token
except Exception as e: # except Exception as e:
print(e) # print(e)
return None # return None
async def test_azure_completion_streaming(): # async def test_azure_completion_streaming():
""" # """
Test azure streaming call - measure on time to first (non-null) token. # Test azure streaming call - measure on time to first (non-null) token.
""" # """
n = 3 # Number of concurrent tasks # n = 3 # Number of concurrent tasks
## OPENAI AVG. TIME # ## OPENAI AVG. TIME
tasks = [_openai_completion() for _ in range(n)] # tasks = [_openai_completion() for _ in range(n)]
chat_completions = await asyncio.gather(*tasks) # chat_completions = await asyncio.gather(*tasks)
successful_completions = [c for c in chat_completions if c is not None] # successful_completions = [c for c in chat_completions if c is not None]
total_time = 0 # total_time = 0
for item in successful_completions: # for item in successful_completions:
total_time += item # total_time += item
avg_openai_time = total_time / 3 # avg_openai_time = total_time / 3
## ROUTER AVG. TIME # ## ROUTER AVG. TIME
tasks = [_router_completion() for _ in range(n)] # tasks = [_router_completion() for _ in range(n)]
chat_completions = await asyncio.gather(*tasks) # chat_completions = await asyncio.gather(*tasks)
successful_completions = [c for c in chat_completions if c is not None] # successful_completions = [c for c in chat_completions if c is not None]
total_time = 0 # total_time = 0
for item in successful_completions: # for item in successful_completions:
total_time += item # total_time += item
avg_router_time = total_time / 3 # avg_router_time = total_time / 3
## COMPARE # ## COMPARE
print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}") # print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
assert avg_router_time < avg_openai_time + 0.5 # assert avg_router_time < avg_openai_time + 0.5
# asyncio.run(test_azure_completion_streaming()) # # asyncio.run(test_azure_completion_streaming())