From 38a9a106d2ba9bacfffd45e81c3f88b45bdfc65f Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 16 Oct 2024 22:16:23 -0700
Subject: [PATCH] LiteLLM Minor Fixes & Improvements (10/16/2024)  (#6265)

* fix(caching_handler.py): handle positional arguments in add cache logic

Fixes https://github.com/BerriAI/litellm/issues/6264

* feat(litellm_pre_call_utils.py): allow forwarding openai org id to backend client

https://github.com/BerriAI/litellm/issues/6237

* docs(configs.md): add 'forward_openai_org_id' to docs

* fix(proxy_server.py): return model info if user_model is set

Fixes https://github.com/BerriAI/litellm/issues/6233

* fix(hosted_vllm/chat/transformation.py): don't set tools unless non-none

* fix(openai.py): improve debug log for openai 'str' error

Addresses https://github.com/BerriAI/litellm/issues/6272

* fix(proxy_server.py): fix linting error

* fix(proxy_server.py): fix linting errors

* test: skip WIP test

* docs(openai.md): add docs on passing openai org id from client to openai
---
 docs/my-website/docs/providers/openai.md      | 47 ++++++++++-
 docs/my-website/docs/proxy/configs.md         |  3 +
 litellm/caching/caching_handler.py            | 52 +++++++++----
 litellm/llms/OpenAI/openai.py                 | 10 ++-
 .../llms/hosted_vllm/chat/transformation.py   |  3 +-
 litellm/proxy/_new_secret_config.yaml         | 27 +------
 litellm/proxy/_types.py                       |  5 ++
 litellm/proxy/litellm_pre_call_utils.py       | 40 +++++++++-
 litellm/proxy/proxy_server.py                 | 29 ++++++-
 litellm/utils.py                              |  4 +-
 tests/llm_translation/test_optional_params.py | 15 ++++
 tests/local_testing/test_caching.py           | 67 ++++++++++++++++
 tests/local_testing/test_proxy_server.py      | 78 +++++++++++++++++++
 tests/local_testing/test_proxy_utils.py       | 38 +++++++++
 14 files changed, 371 insertions(+), 47 deletions(-)

diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md
index 44ba9d196..15661f652 100644
--- a/docs/my-website/docs/providers/openai.md
+++ b/docs/my-website/docs/providers/openai.md
@@ -492,4 +492,49 @@ response = completion("openai/your-model-name", messages)
 
 If you need to set api_base dynamically, just pass it in completions instead - `completions(...,api_base="your-proxy-api-base")`
 
-For more check out [setting API Base/Keys](../set_keys.md)
\ No newline at end of file
+For more check out [setting API Base/Keys](../set_keys.md)
+
+### Forwarding Org ID for Proxy requests
+
+Forward openai Org ID's from the client to OpenAI with `forward_openai_org_id` param. 
+
+1. Setup config.yaml 
+
+```yaml
+model_list:
+  - model_name: "gpt-3.5-turbo"
+    litellm_params:
+      model: gpt-3.5-turbo
+      api_key: os.environ/OPENAI_API_KEY
+
+general_settings:
+    forward_openai_org_id: true # 👈 KEY CHANGE
+```
+
+2. Start Proxy
+
+```bash
+litellm --config config.yaml --detailed_debug
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Make OpenAI call
+
+```python
+from openai import OpenAI
+client = OpenAI(
+    api_key="sk-1234",
+    organization="my-special-org",
+    base_url="http://0.0.0.0:4000"
+)
+
+client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])
+```
+
+In your logs you should see the forwarded org id
+
+```bash
+LiteLLM:DEBUG: utils.py:255 - Request to litellm:
+LiteLLM:DEBUG: utils.py:255 - litellm.acompletion(... organization='my-special-org',)
+```
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md
index 1dcb95b6f..11ba0427b 100644
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@@ -811,6 +811,8 @@ general_settings:
 | oauth2_config_mappings | Dict[str, str] | Define the OAuth2 config mappings | 
 | pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
 | enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
+| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
+
 ### router_settings - Reference
 
 ```yaml
@@ -859,6 +861,7 @@ router_settings:
 | allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) |
 | allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) |
 
+
 ### environment variables - Reference
 
 | Name | Description |
diff --git a/litellm/caching/caching_handler.py b/litellm/caching/caching_handler.py
index b110cfeed..771c319d7 100644
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@@ -16,6 +16,7 @@ In each method it will call the appropriate method from caching.py
 
 import asyncio
 import datetime
+import inspect
 import threading
 from typing import (
     TYPE_CHECKING,
@@ -632,7 +633,7 @@ class LLMCachingHandler:
             logging_obj=logging_obj,
         )
 
-    async def _async_set_cache(
+    async def async_set_cache(
         self,
         result: Any,
         original_function: Callable,
@@ -653,7 +654,7 @@ class LLMCachingHandler:
         Raises:
             None
         """
-        args = args or ()
+        kwargs.update(convert_args_to_kwargs(result, original_function, kwargs, args))
         if litellm.cache is None:
             return
         # [OPTIONAL] ADD TO CACHE
@@ -675,24 +676,24 @@ class LLMCachingHandler:
                     )  # s3 doesn't support bulk writing. Exclude.
                 ):
                     asyncio.create_task(
-                        litellm.cache.async_add_cache_pipeline(result, *args, **kwargs)
+                        litellm.cache.async_add_cache_pipeline(result, **kwargs)
                     )
                 elif isinstance(litellm.cache.cache, S3Cache):
                     threading.Thread(
                         target=litellm.cache.add_cache,
-                        args=(result,) + args,
+                        args=(result,),
                         kwargs=kwargs,
                     ).start()
                 else:
                     asyncio.create_task(
-                        litellm.cache.async_add_cache(result.json(), *args, **kwargs)
+                        litellm.cache.async_add_cache(
+                            result.model_dump_json(), **kwargs
+                        )
                     )
             else:
-                asyncio.create_task(
-                    litellm.cache.async_add_cache(result, *args, **kwargs)
-                )
+                asyncio.create_task(litellm.cache.async_add_cache(result, **kwargs))
 
-    def _sync_set_cache(
+    def sync_set_cache(
         self,
         result: Any,
         kwargs: Dict[str, Any],
@@ -701,14 +702,16 @@ class LLMCachingHandler:
         """
         Sync internal method to add the result to the cache
         """
+        kwargs.update(
+            convert_args_to_kwargs(result, self.original_function, kwargs, args)
+        )
         if litellm.cache is None:
             return
 
-        args = args or ()
         if self._should_store_result_in_cache(
             original_function=self.original_function, kwargs=kwargs
         ):
-            litellm.cache.add_cache(result, *args, **kwargs)
+            litellm.cache.add_cache(result, **kwargs)
 
         return
 
@@ -772,7 +775,7 @@ class LLMCachingHandler:
 
         # if a complete_streaming_response is assembled, add it to the cache
         if complete_streaming_response is not None:
-            await self._async_set_cache(
+            await self.async_set_cache(
                 result=complete_streaming_response,
                 original_function=self.original_function,
                 kwargs=self.request_kwargs,
@@ -795,7 +798,7 @@ class LLMCachingHandler:
 
         # if a complete_streaming_response is assembled, add it to the cache
         if complete_streaming_response is not None:
-            self._sync_set_cache(
+            self.sync_set_cache(
                 result=complete_streaming_response,
                 kwargs=self.request_kwargs,
             )
@@ -849,3 +852,26 @@ class LLMCachingHandler:
             additional_args=None,
             stream=kwargs.get("stream", False),
         )
+
+
+def convert_args_to_kwargs(
+    result: Any,
+    original_function: Callable,
+    kwargs: Dict[str, Any],
+    args: Optional[Tuple[Any, ...]] = None,
+) -> Dict[str, Any]:
+    # Get the signature of the original function
+    signature = inspect.signature(original_function)
+
+    # Get parameter names in the order they appear in the original function
+    param_names = list(signature.parameters.keys())
+
+    # Create a mapping of positional arguments to parameter names
+    args_to_kwargs = {}
+    if args:
+        for index, arg in enumerate(args):
+            if index < len(param_names):
+                param_name = param_names[index]
+                args_to_kwargs[param_name] = arg
+
+    return args_to_kwargs
diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py
index 3c60ac06a..704dcf304 100644
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@@ -590,6 +590,7 @@ class OpenAIChatCompletion(BaseLLM):
         - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
         - call chat.completions.create by default
         """
+        raw_response = None
         try:
             raw_response = openai_client.chat.completions.with_raw_response.create(
                 **data, timeout=timeout
@@ -602,7 +603,14 @@ class OpenAIChatCompletion(BaseLLM):
             response = raw_response.parse()
             return headers, response
         except Exception as e:
-            raise e
+            if raw_response is not None:
+                raise Exception(
+                    "error - {}, Received response - {}, Type of response - {}".format(
+                        e, raw_response, type(raw_response)
+                    )
+                )
+            else:
+                raise e
 
     def completion(  # type: ignore
         self,
diff --git a/litellm/llms/hosted_vllm/chat/transformation.py b/litellm/llms/hosted_vllm/chat/transformation.py
index 0b1259dbf..c83304ffd 100644
--- a/litellm/llms/hosted_vllm/chat/transformation.py
+++ b/litellm/llms/hosted_vllm/chat/transformation.py
@@ -28,7 +28,8 @@ class HostedVLLMChatConfig(OpenAIGPTConfig):
             _tools = _remove_additional_properties(_tools)
             # remove 'strict' from tools
             _tools = _remove_strict_from_schema(_tools)
-        non_default_params["tools"] = _tools
+        if _tools is not None:
+            non_default_params["tools"] = _tools
         return super().map_openai_params(
             non_default_params, optional_params, model, drop_params
         )
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 5f847c04c..90c9fc3d9 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,33 +1,12 @@
 model_list:
-  - model_name: gpt-3.5-turbo
-    litellm_params:
-      model: azure/gpt-35-turbo # 👈 EU azure model
-      api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
-      api_key: os.environ/AZURE_EUROPE_API_KEY
-      region_name: "eu"
-  - model_name: gpt-4o
-    litellm_params:
-      model: azure/gpt-4o
-      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
-      api_key: os.environ/AZURE_API_KEY
-      region_name: "us"
-  - model_name: gpt-3.5-turbo-end-user-test
+  - model_name: "gpt-3.5-turbo"
     litellm_params:
       model: gpt-3.5-turbo
-      region_name: "eu"
-    model_info:
-      id: "1"
+      api_key: os.environ/OPENAI_API_KEY
 
-# guardrails:
-#   - guardrail_name: "gibberish-guard"
-#     litellm_params:
-#       guardrail: guardrails_ai
-#       guard_name: "gibberish_guard"
-#       mode: "post_call"
-#       api_base: os.environ/GUARDRAILS_AI_API_BASE
 
 assistant_settings:
   custom_llm_provider: azure
   litellm_params: 
     api_key: os.environ/AZURE_API_KEY
-    api_base: os.environ/AZURE_API_BASE
\ No newline at end of file
+    api_base: os.environ/AZURE_API_BASE
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index d9a997ebc..78114b1c4 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -2030,3 +2030,8 @@ class SpecialHeaders(enum.Enum):
     openai_authorization = "Authorization"
     azure_authorization = "API-Key"
     anthropic_authorization = "x-api-key"
+
+
+class LitellmDataForBackendLLMCall(TypedDict, total=False):
+    headers: dict
+    organization: str
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 31e9d219e..5630160f9 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -9,6 +9,7 @@ from litellm._logging import verbose_logger, verbose_proxy_logger
 from litellm.proxy._types import (
     AddTeamCallback,
     CommonProxyErrors,
+    LitellmDataForBackendLLMCall,
     LiteLLMRoutes,
     SpecialHeaders,
     TeamCallbackMetadata,
@@ -172,9 +173,44 @@ def get_forwardable_headers(
             "x-stainless"
         ):  # causes openai sdk to fail
             forwarded_headers[header] = value
+
     return forwarded_headers
 
 
+def get_openai_org_id_from_headers(
+    headers: dict, general_settings: Optional[Dict] = None
+) -> Optional[str]:
+    """
+    Get the OpenAI Org ID from the headers.
+    """
+    if (
+        general_settings is not None
+        and general_settings.get("forward_openai_org_id") is not True
+    ):
+        return None
+    for header, value in headers.items():
+        if header.lower() == "openai-organization":
+            return value
+    return None
+
+
+def add_litellm_data_for_backend_llm_call(
+    headers: dict, general_settings: Optional[Dict[str, Any]] = None
+) -> LitellmDataForBackendLLMCall:
+    """
+    - Adds forwardable headers
+    - Adds org id
+    """
+    data = LitellmDataForBackendLLMCall()
+    _headers = get_forwardable_headers(headers)
+    if _headers != {}:
+        data["headers"] = _headers
+    _organization = get_openai_org_id_from_headers(headers, general_settings)
+    if _organization is not None:
+        data["organization"] = _organization
+    return data
+
+
 async def add_litellm_data_to_request(
     data: dict,
     request: Request,
@@ -210,8 +246,8 @@ async def add_litellm_data_to_request(
         ),
     )
 
-    if get_forwardable_headers(_headers) != {}:
-        data["headers"] = get_forwardable_headers(_headers)
+    data.update(add_litellm_data_for_backend_llm_call(_headers, general_settings))
+
     # Include original request and headers in the data
     data["proxy_server_request"] = {
         "url": str(request.url),
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index d8a5e0cc8..18d51fe41 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -19,6 +19,7 @@ from typing import (
     List,
     Optional,
     Tuple,
+    cast,
     get_args,
     get_origin,
     get_type_hints,
@@ -7313,18 +7314,40 @@ async def model_info_v1(
 
     ```
     """
-    global llm_model_list, general_settings, user_config_file_path, proxy_config, llm_router
+    global llm_model_list, general_settings, user_config_file_path, proxy_config, llm_router, user_model
+
+    if user_model is not None:
+        # user is trying to get specific model from litellm router
+        try:
+            model_info: Dict = cast(Dict, litellm.get_model_info(model=user_model))
+        except Exception:
+            model_info = {}
+        _deployment_info = Deployment(
+            model_name="*",
+            litellm_params=LiteLLM_Params(
+                model=user_model,
+            ),
+            model_info=model_info,
+        )
+        _deployment_info_dict = _deployment_info.model_dump()
+        _deployment_info_dict = remove_sensitive_info_from_deployment(
+            deployment_dict=_deployment_info_dict
+        )
+        return {"data": _deployment_info_dict}
 
     if llm_model_list is None:
         raise HTTPException(
-            status_code=500, detail={"error": "LLM Model List not loaded in"}
+            status_code=500,
+            detail={
+                "error": "LLM Model List not loaded in. Make sure you passed models in your config.yaml or on the LiteLLM Admin UI. - https://docs.litellm.ai/docs/proxy/configs"
+            },
         )
 
     if llm_router is None:
         raise HTTPException(
             status_code=500,
             detail={
-                "error": "LLM Router is not loaded in. Make sure you passed models in your config.yaml or on the LiteLLM Admin UI."
+                "error": "LLM Router is not loaded in. Make sure you passed models in your config.yaml or on the LiteLLM Admin UI. - https://docs.litellm.ai/docs/proxy/configs"
             },
         )
 
diff --git a/litellm/utils.py b/litellm/utils.py
index c38bd7182..935887637 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -927,7 +927,7 @@ def client(original_function):
             )
 
             # [OPTIONAL] ADD TO CACHE
-            _llm_caching_handler._sync_set_cache(
+            _llm_caching_handler.sync_set_cache(
                 result=result,
                 args=args,
                 kwargs=kwargs,
@@ -1126,7 +1126,7 @@ def client(original_function):
             )
 
             ## Add response to cache
-            await _llm_caching_handler._async_set_cache(
+            await _llm_caching_handler.async_set_cache(
                 result=result,
                 original_function=original_function,
                 kwargs=kwargs,
diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py
index d7182474d..5e993f7c8 100644
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@@ -732,3 +732,18 @@ def test_drop_nested_params_add_prop_and_strict(provider, model):
     )
 
     _check_additional_properties(optional_params["tools"])
+
+
+def test_hosted_vllm_tool_param():
+    """
+    Relevant issue - https://github.com/BerriAI/litellm/issues/6228
+    """
+    optional_params = get_optional_params(
+        model="my-vllm-model",
+        custom_llm_provider="hosted_vllm",
+        temperature=0.2,
+        tools=None,
+        tool_choice=None,
+    )
+    assert "tools" not in optional_params
+    assert "tool_choice" not in optional_params
diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py
index b6ea36c38..3fbcb676f 100644
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@@ -2298,3 +2298,70 @@ def test_basic_caching_import():
 
     assert Cache is not None
     print("Cache imported successfully")
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio()
+async def test_caching_kwargs_input(sync_mode):
+    from litellm import acompletion
+    from litellm.caching.caching_handler import LLMCachingHandler
+    from litellm.types.utils import (
+        Choices,
+        EmbeddingResponse,
+        Message,
+        ModelResponse,
+        Usage,
+        CompletionTokensDetails,
+        PromptTokensDetails,
+    )
+    from datetime import datetime
+
+    llm_caching_handler = LLMCachingHandler(
+        original_function=acompletion, request_kwargs={}, start_time=datetime.now()
+    )
+
+    input = {
+        "result": ModelResponse(
+            id="chatcmpl-AJ119H5XsDnYiZPp5axJ5d7niwqeR",
+            choices=[
+                Choices(
+                    finish_reason="stop",
+                    index=0,
+                    message=Message(
+                        content="Hello! I'm just a computer program, so I don't have feelings, but I'm here to assist you. How can I help you today?",
+                        role="assistant",
+                        tool_calls=None,
+                        function_call=None,
+                    ),
+                )
+            ],
+            created=1729095507,
+            model="gpt-3.5-turbo-0125",
+            object="chat.completion",
+            system_fingerprint=None,
+            usage=Usage(
+                completion_tokens=31,
+                prompt_tokens=16,
+                total_tokens=47,
+                completion_tokens_details=CompletionTokensDetails(
+                    audio_tokens=None, reasoning_tokens=0
+                ),
+                prompt_tokens_details=PromptTokensDetails(
+                    audio_tokens=None, cached_tokens=0
+                ),
+            ),
+            service_tier=None,
+        ),
+        "kwargs": {
+            "messages": [{"role": "user", "content": "42HHey, how's it going?"}],
+            "caching": True,
+            "litellm_call_id": "fae2aa4f-9f75-4f11-8c9c-63ab8d9fae26",
+            "preset_cache_key": "2f69f5640d5e0f25315d0e132f1278bb643554d14565d2c61d61564b10ade90f",
+        },
+        "args": ("gpt-3.5-turbo",),
+    }
+    if sync_mode is True:
+        llm_caching_handler.sync_set_cache(**input)
+    else:
+        input["original_function"] = acompletion
+        await llm_caching_handler.async_set_cache(**input)
diff --git a/tests/local_testing/test_proxy_server.py b/tests/local_testing/test_proxy_server.py
index cc8be92bd..3dbe417ea 100644
--- a/tests/local_testing/test_proxy_server.py
+++ b/tests/local_testing/test_proxy_server.py
@@ -1796,3 +1796,81 @@ async def test_proxy_model_group_info_rerank(prisma_client):
     print(resp)
     models = resp["data"]
     assert models[0].mode == "rerank"
+
+
+# @pytest.mark.asyncio
+# async def test_proxy_team_member_add(prisma_client):
+#     """
+#     Add 10 people to a team. Confirm all 10 are added.
+#     """
+#     from litellm.proxy.management_endpoints.team_endpoints import (
+#         team_member_add,
+#         new_team,
+#     )
+#     from litellm.proxy._types import TeamMemberAddRequest, Member, NewTeamRequest
+
+#     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+#     setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+#     try:
+
+#         async def test():
+#             await litellm.proxy.proxy_server.prisma_client.connect()
+#             from litellm.proxy.proxy_server import user_api_key_cache
+
+#             user_api_key_dict = UserAPIKeyAuth(
+#                 user_role=LitellmUserRoles.PROXY_ADMIN,
+#                 api_key="sk-1234",
+#                 user_id="1234",
+#             )
+
+#             new_team()
+#             for _ in range(10):
+#                 request = TeamMemberAddRequest(
+#                     team_id="1234",
+#                     member=Member(
+#                         user_id="1234",
+#                         user_role=LitellmUserRoles.INTERNAL_USER,
+#                     ),
+#                 )
+#                 key = await team_member_add(
+#                     request, user_api_key_dict=user_api_key_dict
+#                 )
+
+#             print(key)
+#             user_id = key.user_id
+
+#             # check /user/info to verify user_role was set correctly
+#             new_user_info = await user_info(
+#                 user_id=user_id, user_api_key_dict=user_api_key_dict
+#             )
+#             new_user_info = new_user_info.user_info
+#             print("new_user_info=", new_user_info)
+#             assert new_user_info["user_role"] == LitellmUserRoles.INTERNAL_USER
+#             assert new_user_info["user_id"] == user_id
+
+#             generated_key = key.key
+#             bearer_token = "Bearer " + generated_key
+
+#             assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
+
+#             value_from_prisma = await prisma_client.get_data(
+#                 token=generated_key,
+#             )
+#             print("token from prisma", value_from_prisma)
+
+#             request = Request(
+#                 {
+#                     "type": "http",
+#                     "route": api_route,
+#                     "path": api_route.path,
+#                     "headers": [("Authorization", bearer_token)],
+#                 }
+#             )
+
+#             # use generated key to auth in
+#             result = await user_api_key_auth(request=request, api_key=bearer_token)
+#             print("result from user auth with new key", result)
+
+#         asyncio.run(test())
+#     except Exception as e:
+#         pytest.fail(f"An exception occurred - {str(e)}")
diff --git a/tests/local_testing/test_proxy_utils.py b/tests/local_testing/test_proxy_utils.py
index 321f0ee8d..5bb9bdc16 100644
--- a/tests/local_testing/test_proxy_utils.py
+++ b/tests/local_testing/test_proxy_utils.py
@@ -368,3 +368,41 @@ def test_is_request_body_safe_model_enabled(
         error_raised = True
 
     assert expect_error == error_raised
+
+
+def test_reading_openai_org_id_from_headers():
+    from litellm.proxy.litellm_pre_call_utils import get_openai_org_id_from_headers
+
+    headers = {
+        "OpenAI-Organization": "test_org_id",
+    }
+    org_id = get_openai_org_id_from_headers(headers)
+    assert org_id == "test_org_id"
+
+
+@pytest.mark.parametrize(
+    "headers, expected_data",
+    [
+        ({"OpenAI-Organization": "test_org_id"}, {"organization": "test_org_id"}),
+        ({"openai-organization": "test_org_id"}, {"organization": "test_org_id"}),
+        ({}, {}),
+        (
+            {
+                "OpenAI-Organization": "test_org_id",
+                "Authorization": "Bearer test_token",
+            },
+            {
+                "organization": "test_org_id",
+            },
+        ),
+    ],
+)
+def test_add_litellm_data_for_backend_llm_call(headers, expected_data):
+    import json
+    from litellm.proxy.litellm_pre_call_utils import (
+        add_litellm_data_for_backend_llm_call,
+    )
+
+    data = add_litellm_data_for_backend_llm_call(headers)
+
+    assert json.dumps(data, sort_keys=True) == json.dumps(expected_data, sort_keys=True)