build(requirements.txt): bump openai dep version

fixes proxies argument
2024-11-29 21:11:12 -08:00 · 2024-11-29 21:11:12 -08:00 · 5d250ca19a
commit 5d250ca19a
parent 711a1428f8
2 changed files with 117 additions and 117 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 # LITELLM PROXY DEPENDENCIES #
 anyio==4.4.0 # openai + http req.
-openai==1.54.0  # openai req. 
+openai==1.55.3  # openai req. 
 fastapi==0.111.0 # server dep
 backoff==2.2.1 # server dep
 pyyaml==6.0.0 # server dep
--- a/tests/local_testing/test_azure_perf.py
+++ b/tests/local_testing/test_azure_perf.py
@ -1,128 +1,128 @@
-#### What this tests ####
+# #### What this tests ####
-#    This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
+# #    This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
-import sys, os, time, inspect, asyncio, traceback
+# import sys, os, time, inspect, asyncio, traceback
-from datetime import datetime
+# from datetime import datetime
-import pytest
+# import pytest
-sys.path.insert(0, os.path.abspath("../.."))
+# sys.path.insert(0, os.path.abspath("../.."))
-import openai, litellm, uuid
+# import openai, litellm, uuid
-from openai import AsyncAzureOpenAI
+# from openai import AsyncAzureOpenAI
-client = AsyncAzureOpenAI(
+# client = AsyncAzureOpenAI(
-    api_key=os.getenv("AZURE_API_KEY"),
+#     api_key=os.getenv("AZURE_API_KEY"),
-    azure_endpoint=os.getenv("AZURE_API_BASE"),  # type: ignore
+#     azure_endpoint=os.getenv("AZURE_API_BASE"),  # type: ignore
-    api_version=os.getenv("AZURE_API_VERSION"),
+#     api_version=os.getenv("AZURE_API_VERSION"),
-)
+# )
-model_list = [
+# model_list = [
-    {
+#     {
-        "model_name": "azure-test",
+#         "model_name": "azure-test",
-        "litellm_params": {
+#         "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+#             "model": "azure/chatgpt-v-2",
-            "api_key": os.getenv("AZURE_API_KEY"),
+#             "api_key": os.getenv("AZURE_API_KEY"),
-            "api_base": os.getenv("AZURE_API_BASE"),
+#             "api_base": os.getenv("AZURE_API_BASE"),
-            "api_version": os.getenv("AZURE_API_VERSION"),
+#             "api_version": os.getenv("AZURE_API_VERSION"),
-        },
+#         },
-    }
+#     }
-]
+# ]
-router = litellm.Router(model_list=model_list)  # type: ignore
+# router = litellm.Router(model_list=model_list)  # type: ignore
-async def _openai_completion():
+# async def _openai_completion():
-    try:
+#     try:
-        start_time = time.time()
+#         start_time = time.time()
-        response = await client.chat.completions.create(
+#         response = await client.chat.completions.create(
-            model="chatgpt-v-2",
+#             model="chatgpt-v-2",
-            messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
+#             messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
-            stream=True,
+#             stream=True,
-        )
+#         )
-        time_to_first_token = None
+#         time_to_first_token = None
-        first_token_ts = None
+#         first_token_ts = None
-        init_chunk = None
+#         init_chunk = None
-        async for chunk in response:
+#         async for chunk in response:
-            if (
+#             if (
-                time_to_first_token is None
+#                 time_to_first_token is None
-                and len(chunk.choices) > 0
+#                 and len(chunk.choices) > 0
-                and chunk.choices[0].delta.content is not None
+#                 and chunk.choices[0].delta.content is not None
-            ):
+#             ):
-                first_token_ts = time.time()
+#                 first_token_ts = time.time()
-                time_to_first_token = first_token_ts - start_time
+#                 time_to_first_token = first_token_ts - start_time
-                init_chunk = chunk
+#                 init_chunk = chunk
-        end_time = time.time()
+#         end_time = time.time()
-        print(
+#         print(
-            "OpenAI Call: ",
+#             "OpenAI Call: ",
-            init_chunk,
+#             init_chunk,
-            start_time,
+#             start_time,
-            first_token_ts,
+#             first_token_ts,
-            time_to_first_token,
+#             time_to_first_token,
-            end_time,
+#             end_time,
-        )
+#         )
-        return time_to_first_token
+#         return time_to_first_token
-    except Exception as e:
+#     except Exception as e:
-        print(e)
+#         print(e)
-        return None
+#         return None
-async def _router_completion():
+# async def _router_completion():
-    try:
+#     try:
-        start_time = time.time()
+#         start_time = time.time()
-        response = await router.acompletion(
+#         response = await router.acompletion(
-            model="azure-test",
+#             model="azure-test",
-            messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
+#             messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
-            stream=True,
+#             stream=True,
-        )
+#         )
-        time_to_first_token = None
+#         time_to_first_token = None
-        first_token_ts = None
+#         first_token_ts = None
-        init_chunk = None
+#         init_chunk = None
-        async for chunk in response:
+#         async for chunk in response:
-            if (
+#             if (
-                time_to_first_token is None
+#                 time_to_first_token is None
-                and len(chunk.choices) > 0
+#                 and len(chunk.choices) > 0
-                and chunk.choices[0].delta.content is not None
+#                 and chunk.choices[0].delta.content is not None
-            ):
+#             ):
-                first_token_ts = time.time()
+#                 first_token_ts = time.time()
-                time_to_first_token = first_token_ts - start_time
+#                 time_to_first_token = first_token_ts - start_time
-                init_chunk = chunk
+#                 init_chunk = chunk
-        end_time = time.time()
+#         end_time = time.time()
-        print(
+#         print(
-            "Router Call: ",
+#             "Router Call: ",
-            init_chunk,
+#             init_chunk,
-            start_time,
+#             start_time,
-            first_token_ts,
+#             first_token_ts,
-            time_to_first_token,
+#             time_to_first_token,
-            end_time - first_token_ts,
+#             end_time - first_token_ts,
-        )
+#         )
-        return time_to_first_token
+#         return time_to_first_token
-    except Exception as e:
+#     except Exception as e:
-        print(e)
+#         print(e)
-        return None
+#         return None
-async def test_azure_completion_streaming():
+# async def test_azure_completion_streaming():
-    """
+#     """
-    Test azure streaming call - measure on time to first (non-null) token.
+#     Test azure streaming call - measure on time to first (non-null) token.
-    """
+#     """
-    n = 3  # Number of concurrent tasks
+#     n = 3  # Number of concurrent tasks
-    ## OPENAI AVG. TIME
+#     ## OPENAI AVG. TIME
-    tasks = [_openai_completion() for _ in range(n)]
+#     tasks = [_openai_completion() for _ in range(n)]
-    chat_completions = await asyncio.gather(*tasks)
+#     chat_completions = await asyncio.gather(*tasks)
-    successful_completions = [c for c in chat_completions if c is not None]
+#     successful_completions = [c for c in chat_completions if c is not None]
-    total_time = 0
+#     total_time = 0
-    for item in successful_completions:
+#     for item in successful_completions:
-        total_time += item
+#         total_time += item
-    avg_openai_time = total_time / 3
+#     avg_openai_time = total_time / 3
-    ## ROUTER AVG. TIME
+#     ## ROUTER AVG. TIME
-    tasks = [_router_completion() for _ in range(n)]
+#     tasks = [_router_completion() for _ in range(n)]
-    chat_completions = await asyncio.gather(*tasks)
+#     chat_completions = await asyncio.gather(*tasks)
-    successful_completions = [c for c in chat_completions if c is not None]
+#     successful_completions = [c for c in chat_completions if c is not None]
-    total_time = 0
+#     total_time = 0
-    for item in successful_completions:
+#     for item in successful_completions:
-        total_time += item
+#         total_time += item
-    avg_router_time = total_time / 3
+#     avg_router_time = total_time / 3
-    ## COMPARE
+#     ## COMPARE
-    print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
+#     print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
-    assert avg_router_time < avg_openai_time + 0.5
+#     assert avg_router_time < avg_openai_time + 0.5
-# asyncio.run(test_azure_completion_streaming())
+# # asyncio.run(test_azure_completion_streaming())