forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/16/2024) (#6265)
* fix(caching_handler.py): handle positional arguments in add cache logic Fixes https://github.com/BerriAI/litellm/issues/6264 * feat(litellm_pre_call_utils.py): allow forwarding openai org id to backend client https://github.com/BerriAI/litellm/issues/6237 * docs(configs.md): add 'forward_openai_org_id' to docs * fix(proxy_server.py): return model info if user_model is set Fixes https://github.com/BerriAI/litellm/issues/6233 * fix(hosted_vllm/chat/transformation.py): don't set tools unless non-none * fix(openai.py): improve debug log for openai 'str' error Addresses https://github.com/BerriAI/litellm/issues/6272 * fix(proxy_server.py): fix linting error * fix(proxy_server.py): fix linting errors * test: skip WIP test * docs(openai.md): add docs on passing openai org id from client to openai
This commit is contained in:
parent
43878bd2a0
commit
38a9a106d2
14 changed files with 371 additions and 47 deletions
|
@ -732,3 +732,18 @@ def test_drop_nested_params_add_prop_and_strict(provider, model):
|
|||
)
|
||||
|
||||
_check_additional_properties(optional_params["tools"])
|
||||
|
||||
|
||||
def test_hosted_vllm_tool_param():
|
||||
"""
|
||||
Relevant issue - https://github.com/BerriAI/litellm/issues/6228
|
||||
"""
|
||||
optional_params = get_optional_params(
|
||||
model="my-vllm-model",
|
||||
custom_llm_provider="hosted_vllm",
|
||||
temperature=0.2,
|
||||
tools=None,
|
||||
tool_choice=None,
|
||||
)
|
||||
assert "tools" not in optional_params
|
||||
assert "tool_choice" not in optional_params
|
||||
|
|
|
@ -2298,3 +2298,70 @@ def test_basic_caching_import():
|
|||
|
||||
assert Cache is not None
|
||||
print("Cache imported successfully")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio()
|
||||
async def test_caching_kwargs_input(sync_mode):
|
||||
from litellm import acompletion
|
||||
from litellm.caching.caching_handler import LLMCachingHandler
|
||||
from litellm.types.utils import (
|
||||
Choices,
|
||||
EmbeddingResponse,
|
||||
Message,
|
||||
ModelResponse,
|
||||
Usage,
|
||||
CompletionTokensDetails,
|
||||
PromptTokensDetails,
|
||||
)
|
||||
from datetime import datetime
|
||||
|
||||
llm_caching_handler = LLMCachingHandler(
|
||||
original_function=acompletion, request_kwargs={}, start_time=datetime.now()
|
||||
)
|
||||
|
||||
input = {
|
||||
"result": ModelResponse(
|
||||
id="chatcmpl-AJ119H5XsDnYiZPp5axJ5d7niwqeR",
|
||||
choices=[
|
||||
Choices(
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
message=Message(
|
||||
content="Hello! I'm just a computer program, so I don't have feelings, but I'm here to assist you. How can I help you today?",
|
||||
role="assistant",
|
||||
tool_calls=None,
|
||||
function_call=None,
|
||||
),
|
||||
)
|
||||
],
|
||||
created=1729095507,
|
||||
model="gpt-3.5-turbo-0125",
|
||||
object="chat.completion",
|
||||
system_fingerprint=None,
|
||||
usage=Usage(
|
||||
completion_tokens=31,
|
||||
prompt_tokens=16,
|
||||
total_tokens=47,
|
||||
completion_tokens_details=CompletionTokensDetails(
|
||||
audio_tokens=None, reasoning_tokens=0
|
||||
),
|
||||
prompt_tokens_details=PromptTokensDetails(
|
||||
audio_tokens=None, cached_tokens=0
|
||||
),
|
||||
),
|
||||
service_tier=None,
|
||||
),
|
||||
"kwargs": {
|
||||
"messages": [{"role": "user", "content": "42HHey, how's it going?"}],
|
||||
"caching": True,
|
||||
"litellm_call_id": "fae2aa4f-9f75-4f11-8c9c-63ab8d9fae26",
|
||||
"preset_cache_key": "2f69f5640d5e0f25315d0e132f1278bb643554d14565d2c61d61564b10ade90f",
|
||||
},
|
||||
"args": ("gpt-3.5-turbo",),
|
||||
}
|
||||
if sync_mode is True:
|
||||
llm_caching_handler.sync_set_cache(**input)
|
||||
else:
|
||||
input["original_function"] = acompletion
|
||||
await llm_caching_handler.async_set_cache(**input)
|
||||
|
|
|
@ -1796,3 +1796,81 @@ async def test_proxy_model_group_info_rerank(prisma_client):
|
|||
print(resp)
|
||||
models = resp["data"]
|
||||
assert models[0].mode == "rerank"
|
||||
|
||||
|
||||
# @pytest.mark.asyncio
|
||||
# async def test_proxy_team_member_add(prisma_client):
|
||||
# """
|
||||
# Add 10 people to a team. Confirm all 10 are added.
|
||||
# """
|
||||
# from litellm.proxy.management_endpoints.team_endpoints import (
|
||||
# team_member_add,
|
||||
# new_team,
|
||||
# )
|
||||
# from litellm.proxy._types import TeamMemberAddRequest, Member, NewTeamRequest
|
||||
|
||||
# setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
# try:
|
||||
|
||||
# async def test():
|
||||
# await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
# from litellm.proxy.proxy_server import user_api_key_cache
|
||||
|
||||
# user_api_key_dict = UserAPIKeyAuth(
|
||||
# user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||
# api_key="sk-1234",
|
||||
# user_id="1234",
|
||||
# )
|
||||
|
||||
# new_team()
|
||||
# for _ in range(10):
|
||||
# request = TeamMemberAddRequest(
|
||||
# team_id="1234",
|
||||
# member=Member(
|
||||
# user_id="1234",
|
||||
# user_role=LitellmUserRoles.INTERNAL_USER,
|
||||
# ),
|
||||
# )
|
||||
# key = await team_member_add(
|
||||
# request, user_api_key_dict=user_api_key_dict
|
||||
# )
|
||||
|
||||
# print(key)
|
||||
# user_id = key.user_id
|
||||
|
||||
# # check /user/info to verify user_role was set correctly
|
||||
# new_user_info = await user_info(
|
||||
# user_id=user_id, user_api_key_dict=user_api_key_dict
|
||||
# )
|
||||
# new_user_info = new_user_info.user_info
|
||||
# print("new_user_info=", new_user_info)
|
||||
# assert new_user_info["user_role"] == LitellmUserRoles.INTERNAL_USER
|
||||
# assert new_user_info["user_id"] == user_id
|
||||
|
||||
# generated_key = key.key
|
||||
# bearer_token = "Bearer " + generated_key
|
||||
|
||||
# assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
|
||||
|
||||
# value_from_prisma = await prisma_client.get_data(
|
||||
# token=generated_key,
|
||||
# )
|
||||
# print("token from prisma", value_from_prisma)
|
||||
|
||||
# request = Request(
|
||||
# {
|
||||
# "type": "http",
|
||||
# "route": api_route,
|
||||
# "path": api_route.path,
|
||||
# "headers": [("Authorization", bearer_token)],
|
||||
# }
|
||||
# )
|
||||
|
||||
# # use generated key to auth in
|
||||
# result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||
# print("result from user auth with new key", result)
|
||||
|
||||
# asyncio.run(test())
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
|
|
@ -368,3 +368,41 @@ def test_is_request_body_safe_model_enabled(
|
|||
error_raised = True
|
||||
|
||||
assert expect_error == error_raised
|
||||
|
||||
|
||||
def test_reading_openai_org_id_from_headers():
|
||||
from litellm.proxy.litellm_pre_call_utils import get_openai_org_id_from_headers
|
||||
|
||||
headers = {
|
||||
"OpenAI-Organization": "test_org_id",
|
||||
}
|
||||
org_id = get_openai_org_id_from_headers(headers)
|
||||
assert org_id == "test_org_id"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"headers, expected_data",
|
||||
[
|
||||
({"OpenAI-Organization": "test_org_id"}, {"organization": "test_org_id"}),
|
||||
({"openai-organization": "test_org_id"}, {"organization": "test_org_id"}),
|
||||
({}, {}),
|
||||
(
|
||||
{
|
||||
"OpenAI-Organization": "test_org_id",
|
||||
"Authorization": "Bearer test_token",
|
||||
},
|
||||
{
|
||||
"organization": "test_org_id",
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_add_litellm_data_for_backend_llm_call(headers, expected_data):
|
||||
import json
|
||||
from litellm.proxy.litellm_pre_call_utils import (
|
||||
add_litellm_data_for_backend_llm_call,
|
||||
)
|
||||
|
||||
data = add_litellm_data_for_backend_llm_call(headers)
|
||||
|
||||
assert json.dumps(data, sort_keys=True) == json.dumps(expected_data, sort_keys=True)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue