From 1cd1d23fdf190a63281d1ca9ceb80e31c2a1d20a Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 24 Oct 2024 19:01:41 -0700
Subject: [PATCH] LiteLLM Minor Fixes & Improvements (10/23/2024) (#6407)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs(bedrock.md): clarify bedrock auth in litellm docs

* fix(convert_dict_to_response.py): Fixes https://github.com/BerriAI/litellm/issues/6387

* feat(pattern_match_deployments.py): more robust handling for wildcard routes (model_name: custom_route/* -> openai/*)

Enables user to expose custom routes to users with dynamic handling

* test: add more testing

* docs(custom_pricing.md): add debug tutorial for custom pricing

* test: skip codestral test - unreachable backend

* test: fix test

* fix(pattern_matching_deployments.py): fix typing

* test: cleanup codestral tests - backend api unavailable

* (refactor) prometheus async_log_success_event to be under 100 LOC  (#6416)

* unit testig for prometheus

* unit testing for success metrics

* use 1 helper for _increment_token_metrics

* use helper for _increment_remaining_budget_metrics

* use _increment_remaining_budget_metrics

* use _increment_top_level_request_and_spend_metrics

* use helper for _set_latency_metrics

* remove noqa violation

* fix test prometheus

* test prometheus

* unit testing for all prometheus helper functions

* fix prom unit tests

* fix unit tests prometheus

* fix unit test prom

* (refactor) router - use static methods for client init utils  (#6420)

* use InitalizeOpenAISDKClient

* use InitalizeOpenAISDKClient static method

* fix  # noqa: PLR0915

* (code cleanup) remove unused and undocumented logging integrations - litedebugger, berrispend  (#6406)

* code cleanup remove unused and undocumented code files

* fix unused logging integrations cleanup

* bump: version 1.50.3 → 1.50.4

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
---
 docs/my-website/docs/providers/bedrock.md     | 14 ++--
 docs/my-website/docs/proxy/custom_pricing.md  | 32 ++++++++-
 .../convert_dict_to_response.py               | 42 +++++++----
 litellm/llms/OpenAI/openai.py                 |  8 +--
 .../router_utils/pattern_match_deployments.py | 50 ++++++++++++--
 litellm/types/utils.py                        | 12 +++-
 .../test_convert_dict_to_chat_completion.py   | 38 ++++++++++
 .../test_router_pattern_matching.py           |  8 +--
 .../test_router_helper_utils.py               | 69 +++++++++++++++++++
 9 files changed, 235 insertions(+), 38 deletions(-)
diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md
index 279098d12..afd1fee39 100644
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@@ -9,12 +9,11 @@ LiteLLM requires `boto3` to be installed on your system for Bedrock requests
 pip install boto3>=1.28.57
 ```
 
-## Required Environment Variables
-```python
-os.environ["AWS_ACCESS_KEY_ID"] = ""  # Access key
-os.environ["AWS_SECRET_ACCESS_KEY"] = "" # Secret access key
-os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
-```
+:::info
+
+LiteLLM uses boto3 to handle authentication. All these options are supported - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#credentials.
+
+:::
 
 ## Usage
 
@@ -22,6 +21,7 @@ os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
   <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
 </a>
 
+
 ```python
 import os
 from litellm import completion
@@ -38,7 +38,7 @@ response = completion(
 
 ## LiteLLM Proxy Usage 
 
-Here's how to call Anthropic with the LiteLLM Proxy Server
+Here's how to call Bedrock with the LiteLLM Proxy Server
 
 ### 1. Setup config.yaml
 
diff --git a/docs/my-website/docs/proxy/custom_pricing.md b/docs/my-website/docs/proxy/custom_pricing.md
index 8bc42d8ff..16d634dee 100644
--- a/docs/my-website/docs/proxy/custom_pricing.md
+++ b/docs/my-website/docs/proxy/custom_pricing.md
@@ -57,4 +57,34 @@ model_list:
       api_version: os.envrion/AZURE_API_VERSION
       input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
       output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
-```
\ No newline at end of file
+```
+
+### Debugging 
+
+If you're custom pricing is not being used or you're seeing errors, please check the following:
+
+1. Run the proxy with `LITELLM_LOG="DEBUG"` or the `--detailed_debug` cli flag
+
+```bash
+litellm --config /path/to/config.yaml --detailed_debug
+```
+
+2. Check logs for this line: 
+
+```
+LiteLLM:DEBUG: utils.py:263 - litellm.acompletion
+```
+
+3. Check if 'input_cost_per_token' and 'output_cost_per_token' are top-level keys in the acompletion function. 
+
+```bash
+acompletion(
+  ...,
+  input_cost_per_token: my-custom-price, 
+  output_cost_per_token: my-custom-price,
+)
+```
+
+If these keys are not present, LiteLLM will not use your custom pricing. 
+
+If the problem persists, please file an issue on [GitHub](https://github.com/BerriAI/litellm/issues). 
\ No newline at end of file
diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
index fe69d837c..95749037d 100644
--- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
+++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
@@ -214,6 +214,28 @@ def _handle_invalid_parallel_tool_calls(
         return tool_calls
 
 
+class LiteLLMResponseObjectHandler:
+
+    @staticmethod
+    def convert_to_image_response(
+        response_object: dict,
+        model_response_object: Optional[ImageResponse] = None,
+        hidden_params: Optional[dict] = None,
+    ) -> ImageResponse:
+
+        response_object.update({"hidden_params": hidden_params})
+
+        if model_response_object is None:
+            model_response_object = ImageResponse(**response_object)
+            return model_response_object
+        else:
+            model_response_dict = model_response_object.model_dump()
+
+            model_response_dict.update(response_object)
+            model_response_object = ImageResponse(**model_response_dict)
+            return model_response_object
+
+
 def convert_to_model_response_object(  # noqa: PLR0915
     response_object: Optional[dict] = None,
     model_response_object: Optional[
@@ -238,7 +260,6 @@ def convert_to_model_response_object(  # noqa: PLR0915
     ] = None,  # used for supporting 'json_schema' on older models
 ):
     received_args = locals()
-
     additional_headers = get_response_headers(_response_headers)
 
     if hidden_params is None:
@@ -427,20 +448,11 @@ def convert_to_model_response_object(  # noqa: PLR0915
         ):
             if response_object is None:
                 raise Exception("Error in response object format")
-
-            if model_response_object is None:
-                model_response_object = ImageResponse()
-
-            if "created" in response_object:
-                model_response_object.created = response_object["created"]
-
-            if "data" in response_object:
-                model_response_object.data = response_object["data"]
-
-            if hidden_params is not None:
-                model_response_object._hidden_params = hidden_params
-
-            return model_response_object
+            return LiteLLMResponseObjectHandler.convert_to_image_response(
+                response_object=response_object,
+                model_response_object=model_response_object,
+                hidden_params=hidden_params,
+            )
         elif response_type == "audio_transcription" and (
             model_response_object is None
             or isinstance(model_response_object, TranscriptionResponse)
diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py
index 9cb4beca9..008296fe7 100644
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@@ -1349,7 +1349,7 @@ class OpenAIChatCompletion(BaseLLM):
             if aimg_generation is True:
                 return self.aimage_generation(data=data, prompt=prompt, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)  # type: ignore
 
-            openai_client = self._get_openai_client(
+            openai_client: OpenAI = self._get_openai_client(  # type: ignore
                 is_async=False,
                 api_key=api_key,
                 api_base=api_base,
@@ -1371,8 +1371,9 @@ class OpenAIChatCompletion(BaseLLM):
             )
 
             ## COMPLETION CALL
-            response = openai_client.images.generate(**data, timeout=timeout)  # type: ignore
-            response = response.model_dump()  # type: ignore
+            _response = openai_client.images.generate(**data, timeout=timeout)  # type: ignore
+
+            response = _response.model_dump()
             ## LOGGING
             logging_obj.post_call(
                 input=prompt,
@@ -1380,7 +1381,6 @@ class OpenAIChatCompletion(BaseLLM):
                 additional_args={"complete_input_dict": data},
                 original_response=response,
             )
-            # return response
             return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation")  # type: ignore
         except OpenAIError as e:
 
diff --git a/litellm/router_utils/pattern_match_deployments.py b/litellm/router_utils/pattern_match_deployments.py
index e92049fac..a0d631bf7 100644
--- a/litellm/router_utils/pattern_match_deployments.py
+++ b/litellm/router_utils/pattern_match_deployments.py
@@ -4,6 +4,7 @@ Class to handle llm wildcard routing and regex pattern matching
 
 import copy
 import re
+from re import Match
 from typing import Dict, List, Optional
 
 from litellm import get_llm_provider
@@ -53,11 +54,12 @@ class PatternMatchRouter:
         Returns:
             str: regex pattern
         """
-        # Replace '*' with '.*' for regex matching
-        regex = pattern.replace("*", ".*")
-        # Escape other special characters
-        regex = re.escape(regex).replace(r"\.\*", ".*")
-        return f"^{regex}$"
+        # # Replace '*' with '.*' for regex matching
+        # regex = pattern.replace("*", ".*")
+        # # Escape other special characters
+        # regex = re.escape(regex).replace(r"\.\*", ".*")
+        # return f"^{regex}$"
+        return re.escape(pattern).replace(r"\*", "(.*)")
 
     def route(self, request: Optional[str]) -> Optional[List[Dict]]:
         """
@@ -84,6 +86,44 @@ class PatternMatchRouter:
 
         return None  # No matching pattern found
 
+    @staticmethod
+    def set_deployment_model_name(
+        matched_pattern: Match,
+        litellm_deployment_litellm_model: str,
+    ) -> str:
+        """
+        Set the model name for the matched pattern llm deployment
+
+        E.g.:
+
+        model_name: llmengine/* (can be any regex pattern or wildcard pattern)
+        litellm_params:
+            model: openai/*
+
+        if model_name = "llmengine/foo" -> model = "openai/foo"
+        """
+        ## BASE CASE: if the deployment model name does not contain a wildcard, return the deployment model name
+        if "*" not in litellm_deployment_litellm_model:
+            return litellm_deployment_litellm_model
+
+        wildcard_count = litellm_deployment_litellm_model.count("*")
+
+        # Extract all dynamic segments from the request
+        dynamic_segments = matched_pattern.groups()
+
+        if len(dynamic_segments) > wildcard_count:
+            raise ValueError(
+                f"More wildcards in the deployment model name than the pattern. Wildcard count: {wildcard_count}, dynamic segments count: {len(dynamic_segments)}"
+            )
+
+        # Replace the corresponding wildcards in the litellm model pattern with extracted segments
+        for segment in dynamic_segments:
+            litellm_deployment_litellm_model = litellm_deployment_litellm_model.replace(
+                "*", segment, 1
+            )
+
+        return litellm_deployment_litellm_model
+
     def get_pattern(
         self, model: str, custom_llm_provider: Optional[str] = None
     ) -> Optional[List[Dict]]:
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 341c9fc8b..8cc0844b3 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -1177,12 +1177,15 @@ from openai.types.images_response import ImagesResponse as OpenAIImageResponse
 
 class ImageResponse(OpenAIImageResponse):
     _hidden_params: dict = {}
+    usage: Usage
 
     def __init__(
         self,
         created: Optional[int] = None,
         data: Optional[List[ImageObject]] = None,
         response_ms=None,
+        usage: Optional[Usage] = None,
+        hidden_params: Optional[dict] = None,
     ):
         if response_ms:
             _response_ms = response_ms
@@ -1204,8 +1207,13 @@ class ImageResponse(OpenAIImageResponse):
                 _data.append(ImageObject(**d))
             elif isinstance(d, BaseModel):
                 _data.append(ImageObject(**d.model_dump()))
-        super().__init__(created=created, data=_data)
-        self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+        _usage = usage or Usage(
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+        )
+        super().__init__(created=created, data=_data, usage=_usage)  # type: ignore
+        self._hidden_params = hidden_params or {}
 
     def __contains__(self, key):
         # Define custom behavior for the 'in' operator
diff --git a/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py b/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py
index 20d21a39d..a1d13bcb3 100644
--- a/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py
+++ b/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py
@@ -695,3 +695,41 @@ def test_convert_to_model_response_object_error():
             _response_headers=None,
             convert_tool_call_to_json_mode=False,
         )
+
+
+def test_image_generation_openai_with_pydantic_warning(caplog):
+    try:
+        import logging
+        from litellm.types.utils import ImageResponse, ImageObject
+
+        convert_response_args = {
+            "response_object": {
+                "created": 1729709945,
+                "data": [
+                    {
+                        "b64_json": None,
+                        "revised_prompt": "Generate an image of a baby sea otter. It should look incredibly cute, with big, soulful eyes and a fluffy, wet fur coat. The sea otter should be on its back, as sea otters often do, with its tiny hands holding onto a shell as if it is its precious toy. The background should be a tranquil sea under a clear sky, with soft sunlight reflecting off the waters. The color palette should be soothing with blues, browns, and white.",
+                        "url": "https://oaidalleapiprodscus.blob.core.windows.net/private/org-ikDc4ex8NB5ZzfTf8m5WYVB7/user-JpwZsbIXubBZvan3Y3GchiiB/img-LL0uoOv4CFJIvNYxoNCKB8oc.png?st=2024-10-23T17%3A59%3A05Z&se=2024-10-23T19%3A59%3A05Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-10-22T19%3A26%3A22Z&ske=2024-10-23T19%3A26%3A22Z&sks=b&skv=2024-08-04&sig=Hl4wczJ3H2vZNdLRt/7JvNi6NvQGDnbNkDy15%2Bl3k5s%3D",
+                    }
+                ],
+            },
+            "model_response_object": ImageResponse(
+                created=1729709929,
+                data=[],
+            ),
+            "response_type": "image_generation",
+            "stream": False,
+            "start_time": None,
+            "end_time": None,
+            "hidden_params": None,
+            "_response_headers": None,
+            "convert_tool_call_to_json_mode": None,
+        }
+
+        resp: ImageResponse = convert_to_model_response_object(**convert_response_args)
+        assert resp is not None
+        assert resp.data is not None
+        assert len(resp.data) == 1
+        assert isinstance(resp.data[0], ImageObject)
+    except Exception as e:
+        pytest.fail(f"Test failed with exception: {e}")
diff --git a/tests/local_testing/test_router_pattern_matching.py b/tests/local_testing/test_router_pattern_matching.py
index d7e76b88b..701a62e41 100644
--- a/tests/local_testing/test_router_pattern_matching.py
+++ b/tests/local_testing/test_router_pattern_matching.py
@@ -42,7 +42,7 @@ def test_add_pattern():
     )
     router.add_pattern("openai/*", deployment.to_json(exclude_none=True))
     assert len(router.patterns) == 1
-    assert list(router.patterns.keys())[0] == "^openai/.*$"
+    assert list(router.patterns.keys())[0] == "openai/(.*)"
 
     # try getting the pattern
     assert router.route(request="openai/gpt-15") == [
@@ -64,7 +64,7 @@ def test_add_pattern_vertex_ai():
     )
     router.add_pattern("vertex_ai/*", deployment.to_json(exclude_none=True))
     assert len(router.patterns) == 1
-    assert list(router.patterns.keys())[0] == "^vertex_ai/.*$"
+    assert list(router.patterns.keys())[0] == "vertex_ai/(.*)"
 
     # try getting the pattern
     assert router.route(request="vertex_ai/gemini-1.5-flash-latest") == [
@@ -99,10 +99,10 @@ def test_pattern_to_regex():
     Tests that the pattern is converted to a regex
     """
     router = PatternMatchRouter()
-    assert router._pattern_to_regex("openai/*") == "^openai/.*$"
+    assert router._pattern_to_regex("openai/*") == "openai/(.*)"
     assert (
         router._pattern_to_regex("openai/fo::*::static::*")
-        == "^openai/fo::.*::static::.*$"
+        == "openai/fo::(.*)::static::(.*)"
     )
 
 
diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py
index a97bf3197..78e322764 100644
--- a/tests/router_unit_tests/test_router_helper_utils.py
+++ b/tests/router_unit_tests/test_router_helper_utils.py
@@ -914,3 +914,72 @@ def test_replace_model_in_jsonl(model_list):
     router = Router(model_list=model_list)
     deployments = router.pattern_router.get_deployments_by_pattern(model="claude-3")
     assert deployments is not None
+
+
+# def test_pattern_match_deployments(model_list):
+#     from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
+#     import re
+
+#     patter_router = PatternMatchRouter()
+
+#     request = "fo::hi::static::hello"
+#     model_name = "fo::*:static::*"
+
+#     model_name_regex = patter_router._pattern_to_regex(model_name)
+
+#     # Match against the request
+#     match = re.match(model_name_regex, request)
+
+#     print(f"match: {match}")
+#     print(f"match.end: {match.end()}")
+#     if match is None:
+#         raise ValueError("Match not found")
+#     updated_model = patter_router.set_deployment_model_name(
+#         matched_pattern=match, litellm_deployment_litellm_model="openai/*"
+#     )
+#     assert updated_model == "openai/fo::hi:static::hello"
+
+
+@pytest.mark.parametrize(
+    "user_request_model, model_name, litellm_model, expected_model",
+    [
+        ("llmengine/foo", "llmengine/*", "openai/foo", "openai/foo"),
+        ("llmengine/foo", "llmengine/*", "openai/*", "openai/foo"),
+        (
+            "fo::hi::static::hello",
+            "fo::*::static::*",
+            "openai/fo::*:static::*",
+            "openai/fo::hi:static::hello",
+        ),
+        (
+            "fo::hi::static::hello",
+            "fo::*::static::*",
+            "openai/gpt-3.5-turbo",
+            "openai/gpt-3.5-turbo",
+        ),
+    ],
+)
+def test_pattern_match_deployment_set_model_name(
+    user_request_model, model_name, litellm_model, expected_model
+):
+    from re import Match
+    from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
+
+    pattern_router = PatternMatchRouter()
+
+    import re
+
+    # Convert model_name into a proper regex
+    model_name_regex = pattern_router._pattern_to_regex(model_name)
+
+    # Match against the request
+    match = re.match(model_name_regex, user_request_model)
+
+    if match is None:
+        raise ValueError("Match not found")
+
+    # Call the set_deployment_model_name function
+    updated_model = pattern_router.set_deployment_model_name(match, litellm_model)
+
+    print(updated_model)  # Expected output: "openai/fo::hi:static::hello"
+    assert updated_model == expected_model