From 1cd1d23fdf190a63281d1ca9ceb80e31c2a1d20a Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Thu, 24 Oct 2024 19:01:41 -0700 Subject: [PATCH] LiteLLM Minor Fixes & Improvements (10/23/2024) (#6407) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs(bedrock.md): clarify bedrock auth in litellm docs * fix(convert_dict_to_response.py): Fixes https://github.com/BerriAI/litellm/issues/6387 * feat(pattern_match_deployments.py): more robust handling for wildcard routes (model_name: custom_route/* -> openai/*) Enables user to expose custom routes to users with dynamic handling * test: add more testing * docs(custom_pricing.md): add debug tutorial for custom pricing * test: skip codestral test - unreachable backend * test: fix test * fix(pattern_matching_deployments.py): fix typing * test: cleanup codestral tests - backend api unavailable * (refactor) prometheus async_log_success_event to be under 100 LOC (#6416) * unit testig for prometheus * unit testing for success metrics * use 1 helper for _increment_token_metrics * use helper for _increment_remaining_budget_metrics * use _increment_remaining_budget_metrics * use _increment_top_level_request_and_spend_metrics * use helper for _set_latency_metrics * remove noqa violation * fix test prometheus * test prometheus * unit testing for all prometheus helper functions * fix prom unit tests * fix unit tests prometheus * fix unit test prom * (refactor) router - use static methods for client init utils (#6420) * use InitalizeOpenAISDKClient * use InitalizeOpenAISDKClient static method * fix # noqa: PLR0915 * (code cleanup) remove unused and undocumented logging integrations - litedebugger, berrispend (#6406) * code cleanup remove unused and undocumented code files * fix unused logging integrations cleanup * bump: version 1.50.3 → 1.50.4 --------- Co-authored-by: Ishaan Jaff --- docs/my-website/docs/providers/bedrock.md | 14 ++-- docs/my-website/docs/proxy/custom_pricing.md | 32 ++++++++- .../convert_dict_to_response.py | 42 +++++++---- litellm/llms/OpenAI/openai.py | 8 +-- .../router_utils/pattern_match_deployments.py | 50 ++++++++++++-- litellm/types/utils.py | 12 +++- .../test_convert_dict_to_chat_completion.py | 38 ++++++++++ .../test_router_pattern_matching.py | 8 +-- .../test_router_helper_utils.py | 69 +++++++++++++++++++ 9 files changed, 235 insertions(+), 38 deletions(-) diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 279098d12..afd1fee39 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -9,12 +9,11 @@ LiteLLM requires `boto3` to be installed on your system for Bedrock requests pip install boto3>=1.28.57 ``` -## Required Environment Variables -```python -os.environ["AWS_ACCESS_KEY_ID"] = "" # Access key -os.environ["AWS_SECRET_ACCESS_KEY"] = "" # Secret access key -os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2 -``` +:::info + +LiteLLM uses boto3 to handle authentication. All these options are supported - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#credentials. + +::: ## Usage @@ -22,6 +21,7 @@ os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2 Open In Colab + ```python import os from litellm import completion @@ -38,7 +38,7 @@ response = completion( ## LiteLLM Proxy Usage -Here's how to call Anthropic with the LiteLLM Proxy Server +Here's how to call Bedrock with the LiteLLM Proxy Server ### 1. Setup config.yaml diff --git a/docs/my-website/docs/proxy/custom_pricing.md b/docs/my-website/docs/proxy/custom_pricing.md index 8bc42d8ff..16d634dee 100644 --- a/docs/my-website/docs/proxy/custom_pricing.md +++ b/docs/my-website/docs/proxy/custom_pricing.md @@ -57,4 +57,34 @@ model_list: api_version: os.envrion/AZURE_API_VERSION input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token -``` \ No newline at end of file +``` + +### Debugging + +If you're custom pricing is not being used or you're seeing errors, please check the following: + +1. Run the proxy with `LITELLM_LOG="DEBUG"` or the `--detailed_debug` cli flag + +```bash +litellm --config /path/to/config.yaml --detailed_debug +``` + +2. Check logs for this line: + +``` +LiteLLM:DEBUG: utils.py:263 - litellm.acompletion +``` + +3. Check if 'input_cost_per_token' and 'output_cost_per_token' are top-level keys in the acompletion function. + +```bash +acompletion( + ..., + input_cost_per_token: my-custom-price, + output_cost_per_token: my-custom-price, +) +``` + +If these keys are not present, LiteLLM will not use your custom pricing. + +If the problem persists, please file an issue on [GitHub](https://github.com/BerriAI/litellm/issues). \ No newline at end of file diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py index fe69d837c..95749037d 100644 --- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py +++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py @@ -214,6 +214,28 @@ def _handle_invalid_parallel_tool_calls( return tool_calls +class LiteLLMResponseObjectHandler: + + @staticmethod + def convert_to_image_response( + response_object: dict, + model_response_object: Optional[ImageResponse] = None, + hidden_params: Optional[dict] = None, + ) -> ImageResponse: + + response_object.update({"hidden_params": hidden_params}) + + if model_response_object is None: + model_response_object = ImageResponse(**response_object) + return model_response_object + else: + model_response_dict = model_response_object.model_dump() + + model_response_dict.update(response_object) + model_response_object = ImageResponse(**model_response_dict) + return model_response_object + + def convert_to_model_response_object( # noqa: PLR0915 response_object: Optional[dict] = None, model_response_object: Optional[ @@ -238,7 +260,6 @@ def convert_to_model_response_object( # noqa: PLR0915 ] = None, # used for supporting 'json_schema' on older models ): received_args = locals() - additional_headers = get_response_headers(_response_headers) if hidden_params is None: @@ -427,20 +448,11 @@ def convert_to_model_response_object( # noqa: PLR0915 ): if response_object is None: raise Exception("Error in response object format") - - if model_response_object is None: - model_response_object = ImageResponse() - - if "created" in response_object: - model_response_object.created = response_object["created"] - - if "data" in response_object: - model_response_object.data = response_object["data"] - - if hidden_params is not None: - model_response_object._hidden_params = hidden_params - - return model_response_object + return LiteLLMResponseObjectHandler.convert_to_image_response( + response_object=response_object, + model_response_object=model_response_object, + hidden_params=hidden_params, + ) elif response_type == "audio_transcription" and ( model_response_object is None or isinstance(model_response_object, TranscriptionResponse) diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index 9cb4beca9..008296fe7 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -1349,7 +1349,7 @@ class OpenAIChatCompletion(BaseLLM): if aimg_generation is True: return self.aimage_generation(data=data, prompt=prompt, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore - openai_client = self._get_openai_client( + openai_client: OpenAI = self._get_openai_client( # type: ignore is_async=False, api_key=api_key, api_base=api_base, @@ -1371,8 +1371,9 @@ class OpenAIChatCompletion(BaseLLM): ) ## COMPLETION CALL - response = openai_client.images.generate(**data, timeout=timeout) # type: ignore - response = response.model_dump() # type: ignore + _response = openai_client.images.generate(**data, timeout=timeout) # type: ignore + + response = _response.model_dump() ## LOGGING logging_obj.post_call( input=prompt, @@ -1380,7 +1381,6 @@ class OpenAIChatCompletion(BaseLLM): additional_args={"complete_input_dict": data}, original_response=response, ) - # return response return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation") # type: ignore except OpenAIError as e: diff --git a/litellm/router_utils/pattern_match_deployments.py b/litellm/router_utils/pattern_match_deployments.py index e92049fac..a0d631bf7 100644 --- a/litellm/router_utils/pattern_match_deployments.py +++ b/litellm/router_utils/pattern_match_deployments.py @@ -4,6 +4,7 @@ Class to handle llm wildcard routing and regex pattern matching import copy import re +from re import Match from typing import Dict, List, Optional from litellm import get_llm_provider @@ -53,11 +54,12 @@ class PatternMatchRouter: Returns: str: regex pattern """ - # Replace '*' with '.*' for regex matching - regex = pattern.replace("*", ".*") - # Escape other special characters - regex = re.escape(regex).replace(r"\.\*", ".*") - return f"^{regex}$" + # # Replace '*' with '.*' for regex matching + # regex = pattern.replace("*", ".*") + # # Escape other special characters + # regex = re.escape(regex).replace(r"\.\*", ".*") + # return f"^{regex}$" + return re.escape(pattern).replace(r"\*", "(.*)") def route(self, request: Optional[str]) -> Optional[List[Dict]]: """ @@ -84,6 +86,44 @@ class PatternMatchRouter: return None # No matching pattern found + @staticmethod + def set_deployment_model_name( + matched_pattern: Match, + litellm_deployment_litellm_model: str, + ) -> str: + """ + Set the model name for the matched pattern llm deployment + + E.g.: + + model_name: llmengine/* (can be any regex pattern or wildcard pattern) + litellm_params: + model: openai/* + + if model_name = "llmengine/foo" -> model = "openai/foo" + """ + ## BASE CASE: if the deployment model name does not contain a wildcard, return the deployment model name + if "*" not in litellm_deployment_litellm_model: + return litellm_deployment_litellm_model + + wildcard_count = litellm_deployment_litellm_model.count("*") + + # Extract all dynamic segments from the request + dynamic_segments = matched_pattern.groups() + + if len(dynamic_segments) > wildcard_count: + raise ValueError( + f"More wildcards in the deployment model name than the pattern. Wildcard count: {wildcard_count}, dynamic segments count: {len(dynamic_segments)}" + ) + + # Replace the corresponding wildcards in the litellm model pattern with extracted segments + for segment in dynamic_segments: + litellm_deployment_litellm_model = litellm_deployment_litellm_model.replace( + "*", segment, 1 + ) + + return litellm_deployment_litellm_model + def get_pattern( self, model: str, custom_llm_provider: Optional[str] = None ) -> Optional[List[Dict]]: diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 341c9fc8b..8cc0844b3 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1177,12 +1177,15 @@ from openai.types.images_response import ImagesResponse as OpenAIImageResponse class ImageResponse(OpenAIImageResponse): _hidden_params: dict = {} + usage: Usage def __init__( self, created: Optional[int] = None, data: Optional[List[ImageObject]] = None, response_ms=None, + usage: Optional[Usage] = None, + hidden_params: Optional[dict] = None, ): if response_ms: _response_ms = response_ms @@ -1204,8 +1207,13 @@ class ImageResponse(OpenAIImageResponse): _data.append(ImageObject(**d)) elif isinstance(d, BaseModel): _data.append(ImageObject(**d.model_dump())) - super().__init__(created=created, data=_data) - self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + _usage = usage or Usage( + prompt_tokens=0, + completion_tokens=0, + total_tokens=0, + ) + super().__init__(created=created, data=_data, usage=_usage) # type: ignore + self._hidden_params = hidden_params or {} def __contains__(self, key): # Define custom behavior for the 'in' operator diff --git a/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py b/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py index 20d21a39d..a1d13bcb3 100644 --- a/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py +++ b/tests/llm_translation/test_llm_response_utils/test_convert_dict_to_chat_completion.py @@ -695,3 +695,41 @@ def test_convert_to_model_response_object_error(): _response_headers=None, convert_tool_call_to_json_mode=False, ) + + +def test_image_generation_openai_with_pydantic_warning(caplog): + try: + import logging + from litellm.types.utils import ImageResponse, ImageObject + + convert_response_args = { + "response_object": { + "created": 1729709945, + "data": [ + { + "b64_json": None, + "revised_prompt": "Generate an image of a baby sea otter. It should look incredibly cute, with big, soulful eyes and a fluffy, wet fur coat. The sea otter should be on its back, as sea otters often do, with its tiny hands holding onto a shell as if it is its precious toy. The background should be a tranquil sea under a clear sky, with soft sunlight reflecting off the waters. The color palette should be soothing with blues, browns, and white.", + "url": "https://oaidalleapiprodscus.blob.core.windows.net/private/org-ikDc4ex8NB5ZzfTf8m5WYVB7/user-JpwZsbIXubBZvan3Y3GchiiB/img-LL0uoOv4CFJIvNYxoNCKB8oc.png?st=2024-10-23T17%3A59%3A05Z&se=2024-10-23T19%3A59%3A05Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-10-22T19%3A26%3A22Z&ske=2024-10-23T19%3A26%3A22Z&sks=b&skv=2024-08-04&sig=Hl4wczJ3H2vZNdLRt/7JvNi6NvQGDnbNkDy15%2Bl3k5s%3D", + } + ], + }, + "model_response_object": ImageResponse( + created=1729709929, + data=[], + ), + "response_type": "image_generation", + "stream": False, + "start_time": None, + "end_time": None, + "hidden_params": None, + "_response_headers": None, + "convert_tool_call_to_json_mode": None, + } + + resp: ImageResponse = convert_to_model_response_object(**convert_response_args) + assert resp is not None + assert resp.data is not None + assert len(resp.data) == 1 + assert isinstance(resp.data[0], ImageObject) + except Exception as e: + pytest.fail(f"Test failed with exception: {e}") diff --git a/tests/local_testing/test_router_pattern_matching.py b/tests/local_testing/test_router_pattern_matching.py index d7e76b88b..701a62e41 100644 --- a/tests/local_testing/test_router_pattern_matching.py +++ b/tests/local_testing/test_router_pattern_matching.py @@ -42,7 +42,7 @@ def test_add_pattern(): ) router.add_pattern("openai/*", deployment.to_json(exclude_none=True)) assert len(router.patterns) == 1 - assert list(router.patterns.keys())[0] == "^openai/.*$" + assert list(router.patterns.keys())[0] == "openai/(.*)" # try getting the pattern assert router.route(request="openai/gpt-15") == [ @@ -64,7 +64,7 @@ def test_add_pattern_vertex_ai(): ) router.add_pattern("vertex_ai/*", deployment.to_json(exclude_none=True)) assert len(router.patterns) == 1 - assert list(router.patterns.keys())[0] == "^vertex_ai/.*$" + assert list(router.patterns.keys())[0] == "vertex_ai/(.*)" # try getting the pattern assert router.route(request="vertex_ai/gemini-1.5-flash-latest") == [ @@ -99,10 +99,10 @@ def test_pattern_to_regex(): Tests that the pattern is converted to a regex """ router = PatternMatchRouter() - assert router._pattern_to_regex("openai/*") == "^openai/.*$" + assert router._pattern_to_regex("openai/*") == "openai/(.*)" assert ( router._pattern_to_regex("openai/fo::*::static::*") - == "^openai/fo::.*::static::.*$" + == "openai/fo::(.*)::static::(.*)" ) diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py index a97bf3197..78e322764 100644 --- a/tests/router_unit_tests/test_router_helper_utils.py +++ b/tests/router_unit_tests/test_router_helper_utils.py @@ -914,3 +914,72 @@ def test_replace_model_in_jsonl(model_list): router = Router(model_list=model_list) deployments = router.pattern_router.get_deployments_by_pattern(model="claude-3") assert deployments is not None + + +# def test_pattern_match_deployments(model_list): +# from litellm.router_utils.pattern_match_deployments import PatternMatchRouter +# import re + +# patter_router = PatternMatchRouter() + +# request = "fo::hi::static::hello" +# model_name = "fo::*:static::*" + +# model_name_regex = patter_router._pattern_to_regex(model_name) + +# # Match against the request +# match = re.match(model_name_regex, request) + +# print(f"match: {match}") +# print(f"match.end: {match.end()}") +# if match is None: +# raise ValueError("Match not found") +# updated_model = patter_router.set_deployment_model_name( +# matched_pattern=match, litellm_deployment_litellm_model="openai/*" +# ) +# assert updated_model == "openai/fo::hi:static::hello" + + +@pytest.mark.parametrize( + "user_request_model, model_name, litellm_model, expected_model", + [ + ("llmengine/foo", "llmengine/*", "openai/foo", "openai/foo"), + ("llmengine/foo", "llmengine/*", "openai/*", "openai/foo"), + ( + "fo::hi::static::hello", + "fo::*::static::*", + "openai/fo::*:static::*", + "openai/fo::hi:static::hello", + ), + ( + "fo::hi::static::hello", + "fo::*::static::*", + "openai/gpt-3.5-turbo", + "openai/gpt-3.5-turbo", + ), + ], +) +def test_pattern_match_deployment_set_model_name( + user_request_model, model_name, litellm_model, expected_model +): + from re import Match + from litellm.router_utils.pattern_match_deployments import PatternMatchRouter + + pattern_router = PatternMatchRouter() + + import re + + # Convert model_name into a proper regex + model_name_regex = pattern_router._pattern_to_regex(model_name) + + # Match against the request + match = re.match(model_name_regex, user_request_model) + + if match is None: + raise ValueError("Match not found") + + # Call the set_deployment_model_name function + updated_model = pattern_router.set_deployment_model_name(match, litellm_model) + + print(updated_model) # Expected output: "openai/fo::hi:static::hello" + assert updated_model == expected_model