LiteLLM Minor Fixes & Improvements (10/08/2024) (#6119)

* refactor(cost_calculator.py): move error line to debug - https://github.com/BerriAI/litellm/issues/5683#issuecomment-2398599498

* fix(migrate-hidden-params-to-read-from-standard-logging-payload): Fixes https://github.com/BerriAI/litellm/issues/5546#issuecomment-2399994026

* fix(types/utils.py): mark weight as a litellm param

Fixes https://github.com/BerriAI/litellm/issues/5781

* feat(internal_user_endpoints.py): fix /user/info + show user max budget as default max budget

Fixes https://github.com/BerriAI/litellm/issues/6117

* feat: support returning team member budget in `/user/info`

Sets user max budget in team as max budget on ui

  Closes https://github.com/BerriAI/litellm/issues/6117

* bug fix for optional parameter passing to replicate (#6067)

Signed-off-by: Mandana Vaziri <mvaziri@us.ibm.com>

* fix(o1_transformation.py): handle o1 temperature=0

o1 doesn't support temp=0, allow admin to drop this param

* test: fix test

---------

Signed-off-by: Mandana Vaziri <mvaziri@us.ibm.com>
Co-authored-by: Mandana Vaziri <mvaziri@us.ibm.com>
This commit is contained in:
Krish Dholakia 2024-10-08 21:57:03 -07:00 committed by GitHub
parent ac6fb0cbef
commit 9695c1af10
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 260 additions and 86 deletions

View file

@ -633,7 +633,7 @@ def test_azure_o1_model_params():
@pytest.mark.parametrize(
"temperature, expected_error",
[(0.2, True), (1, False)],
[(0.2, True), (1, False), (0, True)],
)
@pytest.mark.parametrize("provider", ["openai", "azure"])
def test_o1_model_temperature_params(provider, temperature, expected_error):

View file

@ -1403,6 +1403,37 @@ def test_logging_standard_payload_failure_call():
]["standard_logging_object"]
@pytest.mark.parametrize("stream", [True, False])
def test_logging_standard_payload_llm_headers(stream):
from litellm.types.utils import StandardLoggingPayload
# sync completion
customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler]
with patch.object(
customHandler, "log_success_event", new=MagicMock()
) as mock_client:
resp = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
stream=stream,
)
if stream:
for chunk in resp:
continue
time.sleep(2)
mock_client.assert_called_once()
standard_logging_object: StandardLoggingPayload = mock_client.call_args.kwargs[
"kwargs"
]["standard_logging_object"]
print(standard_logging_object["hidden_params"]["additional_headers"])
def test_logging_key_masking_gemini():
customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler]

View file

@ -240,24 +240,24 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
await litellm.proxy.proxy_server.prisma_client.connect()
from litellm.proxy.proxy_server import user_api_key_cache
request = NewUserRequest(user_role=LitellmUserRoles.INTERNAL_USER)
key = await new_user(
request,
user_api_key_dict=UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
),
user_api_key_dict = UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
)
request = NewUserRequest(user_role=LitellmUserRoles.INTERNAL_USER)
key = await new_user(request, user_api_key_dict=user_api_key_dict)
print(key)
user_id = key.user_id
# check /user/info to verify user_role was set correctly
new_user_info = await user_info(user_id=user_id)
new_user_info = await user_info(
user_id=user_id, user_api_key_dict=user_api_key_dict
)
new_user_info = new_user_info.user_info
print("new_user_info=", new_user_info)
assert new_user_info.user_role == LitellmUserRoles.INTERNAL_USER
assert new_user_info.user_id == user_id
assert new_user_info["user_role"] == LitellmUserRoles.INTERNAL_USER
assert new_user_info["user_id"] == user_id
generated_key = key.key
bearer_token = "Bearer " + generated_key

View file

@ -5,6 +5,35 @@ import asyncio
import aiohttp
import time, uuid
from openai import AsyncOpenAI
from typing import Optional
async def get_user_info(session, get_user, call_user, view_all: Optional[bool] = None):
"""
Make sure only models user has access to are returned
"""
if view_all is True:
url = "http://0.0.0.0:4000/user/info"
else:
url = f"http://0.0.0.0:4000/user/info?user_id={get_user}"
headers = {
"Authorization": f"Bearer {call_user}",
"Content-Type": "application/json",
}
async with session.get(url, headers=headers) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
if call_user != get_user:
return status
else:
print(f"call_user: {call_user}; get_user: {get_user}")
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
async def new_user(
@ -630,3 +659,13 @@ async def test_users_in_team_budget():
print("got exception, this is expected")
print(e)
assert "Budget has been exceeded" in str(e)
## Check user info
user_info = await get_user_info(session, get_user, call_user="sk-1234")
assert (
user_info["teams"][0]["team_memberships"][0]["litellm_budget_table"][
"max_budget"
]
== 0.0000001
)

View file

@ -88,9 +88,15 @@ async def test_user_info():
key_gen = await new_user(session, 0, user_id=get_user)
key = key_gen["key"]
## as admin ##
await get_user_info(session=session, get_user=get_user, call_user="sk-1234")
resp = await get_user_info(
session=session, get_user=get_user, call_user="sk-1234"
)
assert isinstance(resp["user_info"], dict)
assert len(resp["user_info"]) > 0
## as user themself ##
await get_user_info(session=session, get_user=get_user, call_user=key)
resp = await get_user_info(session=session, get_user=get_user, call_user=key)
assert isinstance(resp["user_info"], dict)
assert len(resp["user_info"]) > 0
# as random user #
key_gen = await new_user(session=session, i=0)
random_key = key_gen["key"]