forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (11/29/2024) (#6965)
* fix(factory.py): ensure tool call converts image url Fixes https://github.com/BerriAI/litellm/issues/6953 * fix(transformation.py): support mp4 + pdf url's for vertex ai Fixes https://github.com/BerriAI/litellm/issues/6936 * fix(http_handler.py): mask gemini api key in error logs Fixes https://github.com/BerriAI/litellm/issues/6963 * docs(prometheus.md): update prometheus FAQs * feat(auth_checks.py): ensure specific model access > wildcard model access if wildcard model is in access group, but specific model is not - deny access * fix(auth_checks.py): handle auth checks for team based model access groups handles scenario where model access group used for wildcard models * fix(internal_user_endpoints.py): support adding guardrails on `/user/update` Fixes https://github.com/BerriAI/litellm/issues/6942 * fix(key_management_endpoints.py): fix prepare_metadata_fields helper * fix: fix tests * build(requirements.txt): bump openai dep version fixes proxies argument * test: fix tests * fix(http_handler.py): fix error message masking * fix(bedrock_guardrails.py): pass in prepped data * test: fix test * test: fix nvidia nim test * fix(http_handler.py): return original response headers * fix: revert maskedhttpstatuserror * test: update tests * test: cleanup test * fix(key_management_endpoints.py): fix metadata field update logic * fix(key_management_endpoints.py): maintain initial order of guardrails in key update * fix(key_management_endpoints.py): handle prepare metadata * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix: fix key management errors * fix(key_management_endpoints.py): update metadata * test: update test * refactor: add more debug statements * test: skip flaky test * test: fix test * fix: fix test * fix: fix update metadata logic * fix: fix test * ci(config.yml): change db url for e2e ui testing
This commit is contained in:
parent
bd59f18809
commit
859b47f08b
37 changed files with 1040 additions and 714 deletions
|
@ -95,3 +95,107 @@ async def test_handle_failed_db_connection():
|
|||
print("_handle_failed_db_connection_for_get_key_object got exception", exc_info)
|
||||
|
||||
assert str(exc_info.value) == "Failed to connect to DB"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expect_to_work",
|
||||
[("openai/gpt-4o-mini", True), ("openai/gpt-4o", False)],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_can_key_call_model(model, expect_to_work):
|
||||
"""
|
||||
If wildcard model + specific model is used, choose the specific model settings
|
||||
"""
|
||||
from litellm.proxy.auth.auth_checks import can_key_call_model
|
||||
from fastapi import HTTPException
|
||||
|
||||
llm_model_list = [
|
||||
{
|
||||
"model_name": "openai/*",
|
||||
"litellm_params": {
|
||||
"model": "openai/*",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "e6e7006f83029df40ebc02ddd068890253f4cd3092bcb203d3d8e6f6f606f30f",
|
||||
"db_model": False,
|
||||
"access_groups": ["public-openai-models"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "openai/gpt-4o",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-4o",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "0cfcd87f2cb12a783a466888d05c6c89df66db23e01cecd75ec0b83aed73c9ad",
|
||||
"db_model": False,
|
||||
"access_groups": ["private-openai-models"],
|
||||
},
|
||||
},
|
||||
]
|
||||
router = litellm.Router(model_list=llm_model_list)
|
||||
args = {
|
||||
"model": model,
|
||||
"llm_model_list": llm_model_list,
|
||||
"valid_token": UserAPIKeyAuth(
|
||||
models=["public-openai-models"],
|
||||
),
|
||||
"llm_router": router,
|
||||
}
|
||||
if expect_to_work:
|
||||
await can_key_call_model(**args)
|
||||
else:
|
||||
with pytest.raises(Exception) as e:
|
||||
await can_key_call_model(**args)
|
||||
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expect_to_work",
|
||||
[("openai/gpt-4o", False), ("openai/gpt-4o-mini", True)],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_can_team_call_model(model, expect_to_work):
|
||||
from litellm.proxy.auth.auth_checks import model_in_access_group
|
||||
from fastapi import HTTPException
|
||||
|
||||
llm_model_list = [
|
||||
{
|
||||
"model_name": "openai/*",
|
||||
"litellm_params": {
|
||||
"model": "openai/*",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "e6e7006f83029df40ebc02ddd068890253f4cd3092bcb203d3d8e6f6f606f30f",
|
||||
"db_model": False,
|
||||
"access_groups": ["public-openai-models"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "openai/gpt-4o",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-4o",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "0cfcd87f2cb12a783a466888d05c6c89df66db23e01cecd75ec0b83aed73c9ad",
|
||||
"db_model": False,
|
||||
"access_groups": ["private-openai-models"],
|
||||
},
|
||||
},
|
||||
]
|
||||
router = litellm.Router(model_list=llm_model_list)
|
||||
|
||||
args = {
|
||||
"model": model,
|
||||
"team_models": ["public-openai-models"],
|
||||
"llm_router": router,
|
||||
}
|
||||
if expect_to_work:
|
||||
assert model_in_access_group(**args)
|
||||
else:
|
||||
assert not model_in_access_group(**args)
|
||||
|
|
|
@ -33,7 +33,7 @@ from litellm.router import Router
|
|||
|
||||
@pytest.mark.asyncio()
|
||||
@pytest.mark.respx()
|
||||
async def test_azure_tenant_id_auth(respx_mock: MockRouter):
|
||||
async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
|
||||
"""
|
||||
|
||||
Tests when we set tenant_id, client_id, client_secret they don't get sent with the request
|
||||
|
|
|
@ -1,128 +1,128 @@
|
|||
#### What this tests ####
|
||||
# This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
|
||||
import sys, os, time, inspect, asyncio, traceback
|
||||
from datetime import datetime
|
||||
import pytest
|
||||
# #### What this tests ####
|
||||
# # This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
|
||||
# import sys, os, time, inspect, asyncio, traceback
|
||||
# from datetime import datetime
|
||||
# import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import openai, litellm, uuid
|
||||
from openai import AsyncAzureOpenAI
|
||||
# sys.path.insert(0, os.path.abspath("../.."))
|
||||
# import openai, litellm, uuid
|
||||
# from openai import AsyncAzureOpenAI
|
||||
|
||||
client = AsyncAzureOpenAI(
|
||||
api_key=os.getenv("AZURE_API_KEY"),
|
||||
azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore
|
||||
api_version=os.getenv("AZURE_API_VERSION"),
|
||||
)
|
||||
# client = AsyncAzureOpenAI(
|
||||
# api_key=os.getenv("AZURE_API_KEY"),
|
||||
# azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore
|
||||
# api_version=os.getenv("AZURE_API_VERSION"),
|
||||
# )
|
||||
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "azure-test",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
},
|
||||
}
|
||||
]
|
||||
# model_list = [
|
||||
# {
|
||||
# "model_name": "azure-test",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# },
|
||||
# }
|
||||
# ]
|
||||
|
||||
router = litellm.Router(model_list=model_list) # type: ignore
|
||||
# router = litellm.Router(model_list=model_list) # type: ignore
|
||||
|
||||
|
||||
async def _openai_completion():
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = await client.chat.completions.create(
|
||||
model="chatgpt-v-2",
|
||||
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
stream=True,
|
||||
)
|
||||
time_to_first_token = None
|
||||
first_token_ts = None
|
||||
init_chunk = None
|
||||
async for chunk in response:
|
||||
if (
|
||||
time_to_first_token is None
|
||||
and len(chunk.choices) > 0
|
||||
and chunk.choices[0].delta.content is not None
|
||||
):
|
||||
first_token_ts = time.time()
|
||||
time_to_first_token = first_token_ts - start_time
|
||||
init_chunk = chunk
|
||||
end_time = time.time()
|
||||
print(
|
||||
"OpenAI Call: ",
|
||||
init_chunk,
|
||||
start_time,
|
||||
first_token_ts,
|
||||
time_to_first_token,
|
||||
end_time,
|
||||
)
|
||||
return time_to_first_token
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return None
|
||||
# async def _openai_completion():
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
# response = await client.chat.completions.create(
|
||||
# model="chatgpt-v-2",
|
||||
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
# stream=True,
|
||||
# )
|
||||
# time_to_first_token = None
|
||||
# first_token_ts = None
|
||||
# init_chunk = None
|
||||
# async for chunk in response:
|
||||
# if (
|
||||
# time_to_first_token is None
|
||||
# and len(chunk.choices) > 0
|
||||
# and chunk.choices[0].delta.content is not None
|
||||
# ):
|
||||
# first_token_ts = time.time()
|
||||
# time_to_first_token = first_token_ts - start_time
|
||||
# init_chunk = chunk
|
||||
# end_time = time.time()
|
||||
# print(
|
||||
# "OpenAI Call: ",
|
||||
# init_chunk,
|
||||
# start_time,
|
||||
# first_token_ts,
|
||||
# time_to_first_token,
|
||||
# end_time,
|
||||
# )
|
||||
# return time_to_first_token
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# return None
|
||||
|
||||
|
||||
async def _router_completion():
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = await router.acompletion(
|
||||
model="azure-test",
|
||||
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
stream=True,
|
||||
)
|
||||
time_to_first_token = None
|
||||
first_token_ts = None
|
||||
init_chunk = None
|
||||
async for chunk in response:
|
||||
if (
|
||||
time_to_first_token is None
|
||||
and len(chunk.choices) > 0
|
||||
and chunk.choices[0].delta.content is not None
|
||||
):
|
||||
first_token_ts = time.time()
|
||||
time_to_first_token = first_token_ts - start_time
|
||||
init_chunk = chunk
|
||||
end_time = time.time()
|
||||
print(
|
||||
"Router Call: ",
|
||||
init_chunk,
|
||||
start_time,
|
||||
first_token_ts,
|
||||
time_to_first_token,
|
||||
end_time - first_token_ts,
|
||||
)
|
||||
return time_to_first_token
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return None
|
||||
# async def _router_completion():
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
# response = await router.acompletion(
|
||||
# model="azure-test",
|
||||
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
# stream=True,
|
||||
# )
|
||||
# time_to_first_token = None
|
||||
# first_token_ts = None
|
||||
# init_chunk = None
|
||||
# async for chunk in response:
|
||||
# if (
|
||||
# time_to_first_token is None
|
||||
# and len(chunk.choices) > 0
|
||||
# and chunk.choices[0].delta.content is not None
|
||||
# ):
|
||||
# first_token_ts = time.time()
|
||||
# time_to_first_token = first_token_ts - start_time
|
||||
# init_chunk = chunk
|
||||
# end_time = time.time()
|
||||
# print(
|
||||
# "Router Call: ",
|
||||
# init_chunk,
|
||||
# start_time,
|
||||
# first_token_ts,
|
||||
# time_to_first_token,
|
||||
# end_time - first_token_ts,
|
||||
# )
|
||||
# return time_to_first_token
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# return None
|
||||
|
||||
|
||||
async def test_azure_completion_streaming():
|
||||
"""
|
||||
Test azure streaming call - measure on time to first (non-null) token.
|
||||
"""
|
||||
n = 3 # Number of concurrent tasks
|
||||
## OPENAI AVG. TIME
|
||||
tasks = [_openai_completion() for _ in range(n)]
|
||||
chat_completions = await asyncio.gather(*tasks)
|
||||
successful_completions = [c for c in chat_completions if c is not None]
|
||||
total_time = 0
|
||||
for item in successful_completions:
|
||||
total_time += item
|
||||
avg_openai_time = total_time / 3
|
||||
## ROUTER AVG. TIME
|
||||
tasks = [_router_completion() for _ in range(n)]
|
||||
chat_completions = await asyncio.gather(*tasks)
|
||||
successful_completions = [c for c in chat_completions if c is not None]
|
||||
total_time = 0
|
||||
for item in successful_completions:
|
||||
total_time += item
|
||||
avg_router_time = total_time / 3
|
||||
## COMPARE
|
||||
print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
|
||||
assert avg_router_time < avg_openai_time + 0.5
|
||||
# async def test_azure_completion_streaming():
|
||||
# """
|
||||
# Test azure streaming call - measure on time to first (non-null) token.
|
||||
# """
|
||||
# n = 3 # Number of concurrent tasks
|
||||
# ## OPENAI AVG. TIME
|
||||
# tasks = [_openai_completion() for _ in range(n)]
|
||||
# chat_completions = await asyncio.gather(*tasks)
|
||||
# successful_completions = [c for c in chat_completions if c is not None]
|
||||
# total_time = 0
|
||||
# for item in successful_completions:
|
||||
# total_time += item
|
||||
# avg_openai_time = total_time / 3
|
||||
# ## ROUTER AVG. TIME
|
||||
# tasks = [_router_completion() for _ in range(n)]
|
||||
# chat_completions = await asyncio.gather(*tasks)
|
||||
# successful_completions = [c for c in chat_completions if c is not None]
|
||||
# total_time = 0
|
||||
# for item in successful_completions:
|
||||
# total_time += item
|
||||
# avg_router_time = total_time / 3
|
||||
# ## COMPARE
|
||||
# print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
|
||||
# assert avg_router_time < avg_openai_time + 0.5
|
||||
|
||||
|
||||
# asyncio.run(test_azure_completion_streaming())
|
||||
# # asyncio.run(test_azure_completion_streaming())
|
||||
|
|
|
@ -1146,7 +1146,9 @@ async def test_exception_with_headers_httpx(
|
|||
|
||||
except litellm.RateLimitError as e:
|
||||
exception_raised = True
|
||||
assert e.litellm_response_headers is not None
|
||||
assert (
|
||||
e.litellm_response_headers is not None
|
||||
), "litellm_response_headers is None"
|
||||
print("e.litellm_response_headers", e.litellm_response_headers)
|
||||
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue