LiteLLM Minor Fixes & Improvements (10/16/2024) (#6265)

* fix(caching_handler.py): handle positional arguments in add cache logic Fixes https://github.com/BerriAI/litellm/issues/6264 * feat(litellm_pre_call_utils.py): allow forwarding openai org id to backend client https://github.com/BerriAI/litellm/issues/6237 * docs(configs.md): add 'forward_openai_org_id' to docs * fix(proxy_server.py): return model info if user_model is set Fixes https://github.com/BerriAI/litellm/issues/6233 * fix(hosted_vllm/chat/transformation.py): don't set tools unless non-none * fix(openai.py): improve debug log for openai 'str' error Addresses https://github.com/BerriAI/litellm/issues/6272 * fix(proxy_server.py): fix linting error * fix(proxy_server.py): fix linting errors * test: skip WIP test * docs(openai.md): add docs on passing openai org id from client to openai
2024-10-16 22:16:23 -07:00 · 2024-10-16 22:16:23 -07:00 · 38a9a106d2
commit 38a9a106d2
parent 43878bd2a0
14 changed files with 371 additions and 47 deletions
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -732,3 +732,18 @@ def test_drop_nested_params_add_prop_and_strict(provider, model):
    )

    _check_additional_properties(optional_params["tools"])
+
+
+def test_hosted_vllm_tool_param():
+    """
+    Relevant issue - https://github.com/BerriAI/litellm/issues/6228
+    """
+    optional_params = get_optional_params(
+        model="my-vllm-model",
+        custom_llm_provider="hosted_vllm",
+        temperature=0.2,
+        tools=None,
+        tool_choice=None,
+    )
+    assert "tools" not in optional_params
+    assert "tool_choice" not in optional_params
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@ -2298,3 +2298,70 @@ def test_basic_caching_import():

    assert Cache is not None
    print("Cache imported successfully")
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio()
+async def test_caching_kwargs_input(sync_mode):
+    from litellm import acompletion
+    from litellm.caching.caching_handler import LLMCachingHandler
+    from litellm.types.utils import (
+        Choices,
+        EmbeddingResponse,
+        Message,
+        ModelResponse,
+        Usage,
+        CompletionTokensDetails,
+        PromptTokensDetails,
+    )
+    from datetime import datetime
+
+    llm_caching_handler = LLMCachingHandler(
+        original_function=acompletion, request_kwargs={}, start_time=datetime.now()
+    )
+
+    input = {
+        "result": ModelResponse(
+            id="chatcmpl-AJ119H5XsDnYiZPp5axJ5d7niwqeR",
+            choices=[
+                Choices(
+                    finish_reason="stop",
+                    index=0,
+                    message=Message(
+                        content="Hello! I'm just a computer program, so I don't have feelings, but I'm here to assist you. How can I help you today?",
+                        role="assistant",
+                        tool_calls=None,
+                        function_call=None,
+                    ),
+                )
+            ],
+            created=1729095507,
+            model="gpt-3.5-turbo-0125",
+            object="chat.completion",
+            system_fingerprint=None,
+            usage=Usage(
+                completion_tokens=31,
+                prompt_tokens=16,
+                total_tokens=47,
+                completion_tokens_details=CompletionTokensDetails(
+                    audio_tokens=None, reasoning_tokens=0
+                ),
+                prompt_tokens_details=PromptTokensDetails(
+                    audio_tokens=None, cached_tokens=0
+                ),
+            ),
+            service_tier=None,
+        ),
+        "kwargs": {
+            "messages": [{"role": "user", "content": "42HHey, how's it going?"}],
+            "caching": True,
+            "litellm_call_id": "fae2aa4f-9f75-4f11-8c9c-63ab8d9fae26",
+            "preset_cache_key": "2f69f5640d5e0f25315d0e132f1278bb643554d14565d2c61d61564b10ade90f",
+        },
+        "args": ("gpt-3.5-turbo",),
+    }
+    if sync_mode is True:
+        llm_caching_handler.sync_set_cache(**input)
+    else:
+        input["original_function"] = acompletion
+        await llm_caching_handler.async_set_cache(**input)
--- a/tests/local_testing/test_proxy_server.py
+++ b/tests/local_testing/test_proxy_server.py
@ -1796,3 +1796,81 @@ async def test_proxy_model_group_info_rerank(prisma_client):
    print(resp)
    models = resp["data"]
    assert models[0].mode == "rerank"
+
+
+# @pytest.mark.asyncio
+# async def test_proxy_team_member_add(prisma_client):
+#     """
+#     Add 10 people to a team. Confirm all 10 are added.
+#     """
+#     from litellm.proxy.management_endpoints.team_endpoints import (
+#         team_member_add,
+#         new_team,
+#     )
+#     from litellm.proxy._types import TeamMemberAddRequest, Member, NewTeamRequest
+
+#     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+#     setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+#     try:
+
+#         async def test():
+#             await litellm.proxy.proxy_server.prisma_client.connect()
+#             from litellm.proxy.proxy_server import user_api_key_cache
+
+#             user_api_key_dict = UserAPIKeyAuth(
+#                 user_role=LitellmUserRoles.PROXY_ADMIN,
+#                 api_key="sk-1234",
+#                 user_id="1234",
+#             )
+
+#             new_team()
+#             for _ in range(10):
+#                 request = TeamMemberAddRequest(
+#                     team_id="1234",
+#                     member=Member(
+#                         user_id="1234",
+#                         user_role=LitellmUserRoles.INTERNAL_USER,
+#                     ),
+#                 )
+#                 key = await team_member_add(
+#                     request, user_api_key_dict=user_api_key_dict
+#                 )
+
+#             print(key)
+#             user_id = key.user_id
+
+#             # check /user/info to verify user_role was set correctly
+#             new_user_info = await user_info(
+#                 user_id=user_id, user_api_key_dict=user_api_key_dict
+#             )
+#             new_user_info = new_user_info.user_info
+#             print("new_user_info=", new_user_info)
+#             assert new_user_info["user_role"] == LitellmUserRoles.INTERNAL_USER
+#             assert new_user_info["user_id"] == user_id
+
+#             generated_key = key.key
+#             bearer_token = "Bearer " + generated_key
+
+#             assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
+
+#             value_from_prisma = await prisma_client.get_data(
+#                 token=generated_key,
+#             )
+#             print("token from prisma", value_from_prisma)
+
+#             request = Request(
+#                 {
+#                     "type": "http",
+#                     "route": api_route,
+#                     "path": api_route.path,
+#                     "headers": [("Authorization", bearer_token)],
+#                 }
+#             )
+
+#             # use generated key to auth in
+#             result = await user_api_key_auth(request=request, api_key=bearer_token)
+#             print("result from user auth with new key", result)
+
+#         asyncio.run(test())
+#     except Exception as e:
+#         pytest.fail(f"An exception occurred - {str(e)}")
--- a/tests/local_testing/test_proxy_utils.py
+++ b/tests/local_testing/test_proxy_utils.py
@ -368,3 +368,41 @@ def test_is_request_body_safe_model_enabled(
        error_raised = True

    assert expect_error == error_raised
+
+
+def test_reading_openai_org_id_from_headers():
+    from litellm.proxy.litellm_pre_call_utils import get_openai_org_id_from_headers
+
+    headers = {
+        "OpenAI-Organization": "test_org_id",
+    }
+    org_id = get_openai_org_id_from_headers(headers)
+    assert org_id == "test_org_id"
+
+
+@pytest.mark.parametrize(
+    "headers, expected_data",
+    [
+        ({"OpenAI-Organization": "test_org_id"}, {"organization": "test_org_id"}),
+        ({"openai-organization": "test_org_id"}, {"organization": "test_org_id"}),
+        ({}, {}),
+        (
+            {
+                "OpenAI-Organization": "test_org_id",
+                "Authorization": "Bearer test_token",
+            },
+            {
+                "organization": "test_org_id",
+            },
+        ),
+    ],
+)
+def test_add_litellm_data_for_backend_llm_call(headers, expected_data):
+    import json
+    from litellm.proxy.litellm_pre_call_utils import (
+        add_litellm_data_for_backend_llm_call,
+    )
+
+    data = add_litellm_data_for_backend_llm_call(headers)
+
+    assert json.dumps(data, sort_keys=True) == json.dumps(expected_data, sort_keys=True)