LiteLLM Minor Fixes & Improvements (10/23/2024) (#6407)

* docs(bedrock.md): clarify bedrock auth in litellm docs

* fix(convert_dict_to_response.py): Fixes https://github.com/BerriAI/litellm/issues/6387

* feat(pattern_match_deployments.py): more robust handling for wildcard routes (model_name: custom_route/* -> openai/*)

Enables user to expose custom routes to users with dynamic handling

* test: add more testing

* docs(custom_pricing.md): add debug tutorial for custom pricing

* test: skip codestral test - unreachable backend

* test: fix test

* fix(pattern_matching_deployments.py): fix typing

* test: cleanup codestral tests - backend api unavailable

* (refactor) prometheus async_log_success_event to be under 100 LOC  (#6416)

* unit testig for prometheus

* unit testing for success metrics

* use 1 helper for _increment_token_metrics

* use helper for _increment_remaining_budget_metrics

* use _increment_remaining_budget_metrics

* use _increment_top_level_request_and_spend_metrics

* use helper for _set_latency_metrics

* remove noqa violation

* fix test prometheus

* test prometheus

* unit testing for all prometheus helper functions

* fix prom unit tests

* fix unit tests prometheus

* fix unit test prom

* (refactor) router - use static methods for client init utils  (#6420)

* use InitalizeOpenAISDKClient

* use InitalizeOpenAISDKClient static method

* fix  # noqa: PLR0915

* (code cleanup) remove unused and undocumented logging integrations - litedebugger, berrispend  (#6406)

* code cleanup remove unused and undocumented code files

* fix unused logging integrations cleanup

* bump: version 1.50.3 → 1.50.4

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
Krish Dholakia 2024-10-24 19:01:41 -07:00 committed by GitHub
parent c04c4a82f1
commit 1cd1d23fdf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 235 additions and 38 deletions

View file

@ -9,12 +9,11 @@ LiteLLM requires `boto3` to be installed on your system for Bedrock requests
pip install boto3>=1.28.57 pip install boto3>=1.28.57
``` ```
## Required Environment Variables :::info
```python
os.environ["AWS_ACCESS_KEY_ID"] = "" # Access key LiteLLM uses boto3 to handle authentication. All these options are supported - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#credentials.
os.environ["AWS_SECRET_ACCESS_KEY"] = "" # Secret access key
os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2 :::
```
## Usage ## Usage
@ -22,6 +21,7 @@ os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> </a>
```python ```python
import os import os
from litellm import completion from litellm import completion
@ -38,7 +38,7 @@ response = completion(
## LiteLLM Proxy Usage ## LiteLLM Proxy Usage
Here's how to call Anthropic with the LiteLLM Proxy Server Here's how to call Bedrock with the LiteLLM Proxy Server
### 1. Setup config.yaml ### 1. Setup config.yaml

View file

@ -58,3 +58,33 @@ model_list:
input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
``` ```
### Debugging
If you're custom pricing is not being used or you're seeing errors, please check the following:
1. Run the proxy with `LITELLM_LOG="DEBUG"` or the `--detailed_debug` cli flag
```bash
litellm --config /path/to/config.yaml --detailed_debug
```
2. Check logs for this line:
```
LiteLLM:DEBUG: utils.py:263 - litellm.acompletion
```
3. Check if 'input_cost_per_token' and 'output_cost_per_token' are top-level keys in the acompletion function.
```bash
acompletion(
...,
input_cost_per_token: my-custom-price,
output_cost_per_token: my-custom-price,
)
```
If these keys are not present, LiteLLM will not use your custom pricing.
If the problem persists, please file an issue on [GitHub](https://github.com/BerriAI/litellm/issues).

View file

@ -214,6 +214,28 @@ def _handle_invalid_parallel_tool_calls(
return tool_calls return tool_calls
class LiteLLMResponseObjectHandler:
@staticmethod
def convert_to_image_response(
response_object: dict,
model_response_object: Optional[ImageResponse] = None,
hidden_params: Optional[dict] = None,
) -> ImageResponse:
response_object.update({"hidden_params": hidden_params})
if model_response_object is None:
model_response_object = ImageResponse(**response_object)
return model_response_object
else:
model_response_dict = model_response_object.model_dump()
model_response_dict.update(response_object)
model_response_object = ImageResponse(**model_response_dict)
return model_response_object
def convert_to_model_response_object( # noqa: PLR0915 def convert_to_model_response_object( # noqa: PLR0915
response_object: Optional[dict] = None, response_object: Optional[dict] = None,
model_response_object: Optional[ model_response_object: Optional[
@ -238,7 +260,6 @@ def convert_to_model_response_object( # noqa: PLR0915
] = None, # used for supporting 'json_schema' on older models ] = None, # used for supporting 'json_schema' on older models
): ):
received_args = locals() received_args = locals()
additional_headers = get_response_headers(_response_headers) additional_headers = get_response_headers(_response_headers)
if hidden_params is None: if hidden_params is None:
@ -427,20 +448,11 @@ def convert_to_model_response_object( # noqa: PLR0915
): ):
if response_object is None: if response_object is None:
raise Exception("Error in response object format") raise Exception("Error in response object format")
return LiteLLMResponseObjectHandler.convert_to_image_response(
if model_response_object is None: response_object=response_object,
model_response_object = ImageResponse() model_response_object=model_response_object,
hidden_params=hidden_params,
if "created" in response_object: )
model_response_object.created = response_object["created"]
if "data" in response_object:
model_response_object.data = response_object["data"]
if hidden_params is not None:
model_response_object._hidden_params = hidden_params
return model_response_object
elif response_type == "audio_transcription" and ( elif response_type == "audio_transcription" and (
model_response_object is None model_response_object is None
or isinstance(model_response_object, TranscriptionResponse) or isinstance(model_response_object, TranscriptionResponse)

View file

@ -1349,7 +1349,7 @@ class OpenAIChatCompletion(BaseLLM):
if aimg_generation is True: if aimg_generation is True:
return self.aimage_generation(data=data, prompt=prompt, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore return self.aimage_generation(data=data, prompt=prompt, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore
openai_client = self._get_openai_client( openai_client: OpenAI = self._get_openai_client( # type: ignore
is_async=False, is_async=False,
api_key=api_key, api_key=api_key,
api_base=api_base, api_base=api_base,
@ -1371,8 +1371,9 @@ class OpenAIChatCompletion(BaseLLM):
) )
## COMPLETION CALL ## COMPLETION CALL
response = openai_client.images.generate(**data, timeout=timeout) # type: ignore _response = openai_client.images.generate(**data, timeout=timeout) # type: ignore
response = response.model_dump() # type: ignore
response = _response.model_dump()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=prompt, input=prompt,
@ -1380,7 +1381,6 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
original_response=response, original_response=response,
) )
# return response
return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation") # type: ignore return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation") # type: ignore
except OpenAIError as e: except OpenAIError as e:

View file

@ -4,6 +4,7 @@ Class to handle llm wildcard routing and regex pattern matching
import copy import copy
import re import re
from re import Match
from typing import Dict, List, Optional from typing import Dict, List, Optional
from litellm import get_llm_provider from litellm import get_llm_provider
@ -53,11 +54,12 @@ class PatternMatchRouter:
Returns: Returns:
str: regex pattern str: regex pattern
""" """
# Replace '*' with '.*' for regex matching # # Replace '*' with '.*' for regex matching
regex = pattern.replace("*", ".*") # regex = pattern.replace("*", ".*")
# Escape other special characters # # Escape other special characters
regex = re.escape(regex).replace(r"\.\*", ".*") # regex = re.escape(regex).replace(r"\.\*", ".*")
return f"^{regex}$" # return f"^{regex}$"
return re.escape(pattern).replace(r"\*", "(.*)")
def route(self, request: Optional[str]) -> Optional[List[Dict]]: def route(self, request: Optional[str]) -> Optional[List[Dict]]:
""" """
@ -84,6 +86,44 @@ class PatternMatchRouter:
return None # No matching pattern found return None # No matching pattern found
@staticmethod
def set_deployment_model_name(
matched_pattern: Match,
litellm_deployment_litellm_model: str,
) -> str:
"""
Set the model name for the matched pattern llm deployment
E.g.:
model_name: llmengine/* (can be any regex pattern or wildcard pattern)
litellm_params:
model: openai/*
if model_name = "llmengine/foo" -> model = "openai/foo"
"""
## BASE CASE: if the deployment model name does not contain a wildcard, return the deployment model name
if "*" not in litellm_deployment_litellm_model:
return litellm_deployment_litellm_model
wildcard_count = litellm_deployment_litellm_model.count("*")
# Extract all dynamic segments from the request
dynamic_segments = matched_pattern.groups()
if len(dynamic_segments) > wildcard_count:
raise ValueError(
f"More wildcards in the deployment model name than the pattern. Wildcard count: {wildcard_count}, dynamic segments count: {len(dynamic_segments)}"
)
# Replace the corresponding wildcards in the litellm model pattern with extracted segments
for segment in dynamic_segments:
litellm_deployment_litellm_model = litellm_deployment_litellm_model.replace(
"*", segment, 1
)
return litellm_deployment_litellm_model
def get_pattern( def get_pattern(
self, model: str, custom_llm_provider: Optional[str] = None self, model: str, custom_llm_provider: Optional[str] = None
) -> Optional[List[Dict]]: ) -> Optional[List[Dict]]:

View file

@ -1177,12 +1177,15 @@ from openai.types.images_response import ImagesResponse as OpenAIImageResponse
class ImageResponse(OpenAIImageResponse): class ImageResponse(OpenAIImageResponse):
_hidden_params: dict = {} _hidden_params: dict = {}
usage: Usage
def __init__( def __init__(
self, self,
created: Optional[int] = None, created: Optional[int] = None,
data: Optional[List[ImageObject]] = None, data: Optional[List[ImageObject]] = None,
response_ms=None, response_ms=None,
usage: Optional[Usage] = None,
hidden_params: Optional[dict] = None,
): ):
if response_ms: if response_ms:
_response_ms = response_ms _response_ms = response_ms
@ -1204,8 +1207,13 @@ class ImageResponse(OpenAIImageResponse):
_data.append(ImageObject(**d)) _data.append(ImageObject(**d))
elif isinstance(d, BaseModel): elif isinstance(d, BaseModel):
_data.append(ImageObject(**d.model_dump())) _data.append(ImageObject(**d.model_dump()))
super().__init__(created=created, data=_data) _usage = usage or Usage(
self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
)
super().__init__(created=created, data=_data, usage=_usage) # type: ignore
self._hidden_params = hidden_params or {}
def __contains__(self, key): def __contains__(self, key):
# Define custom behavior for the 'in' operator # Define custom behavior for the 'in' operator

View file

@ -695,3 +695,41 @@ def test_convert_to_model_response_object_error():
_response_headers=None, _response_headers=None,
convert_tool_call_to_json_mode=False, convert_tool_call_to_json_mode=False,
) )
def test_image_generation_openai_with_pydantic_warning(caplog):
try:
import logging
from litellm.types.utils import ImageResponse, ImageObject
convert_response_args = {
"response_object": {
"created": 1729709945,
"data": [
{
"b64_json": None,
"revised_prompt": "Generate an image of a baby sea otter. It should look incredibly cute, with big, soulful eyes and a fluffy, wet fur coat. The sea otter should be on its back, as sea otters often do, with its tiny hands holding onto a shell as if it is its precious toy. The background should be a tranquil sea under a clear sky, with soft sunlight reflecting off the waters. The color palette should be soothing with blues, browns, and white.",
"url": "https://oaidalleapiprodscus.blob.core.windows.net/private/org-ikDc4ex8NB5ZzfTf8m5WYVB7/user-JpwZsbIXubBZvan3Y3GchiiB/img-LL0uoOv4CFJIvNYxoNCKB8oc.png?st=2024-10-23T17%3A59%3A05Z&se=2024-10-23T19%3A59%3A05Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-10-22T19%3A26%3A22Z&ske=2024-10-23T19%3A26%3A22Z&sks=b&skv=2024-08-04&sig=Hl4wczJ3H2vZNdLRt/7JvNi6NvQGDnbNkDy15%2Bl3k5s%3D",
}
],
},
"model_response_object": ImageResponse(
created=1729709929,
data=[],
),
"response_type": "image_generation",
"stream": False,
"start_time": None,
"end_time": None,
"hidden_params": None,
"_response_headers": None,
"convert_tool_call_to_json_mode": None,
}
resp: ImageResponse = convert_to_model_response_object(**convert_response_args)
assert resp is not None
assert resp.data is not None
assert len(resp.data) == 1
assert isinstance(resp.data[0], ImageObject)
except Exception as e:
pytest.fail(f"Test failed with exception: {e}")

View file

@ -42,7 +42,7 @@ def test_add_pattern():
) )
router.add_pattern("openai/*", deployment.to_json(exclude_none=True)) router.add_pattern("openai/*", deployment.to_json(exclude_none=True))
assert len(router.patterns) == 1 assert len(router.patterns) == 1
assert list(router.patterns.keys())[0] == "^openai/.*$" assert list(router.patterns.keys())[0] == "openai/(.*)"
# try getting the pattern # try getting the pattern
assert router.route(request="openai/gpt-15") == [ assert router.route(request="openai/gpt-15") == [
@ -64,7 +64,7 @@ def test_add_pattern_vertex_ai():
) )
router.add_pattern("vertex_ai/*", deployment.to_json(exclude_none=True)) router.add_pattern("vertex_ai/*", deployment.to_json(exclude_none=True))
assert len(router.patterns) == 1 assert len(router.patterns) == 1
assert list(router.patterns.keys())[0] == "^vertex_ai/.*$" assert list(router.patterns.keys())[0] == "vertex_ai/(.*)"
# try getting the pattern # try getting the pattern
assert router.route(request="vertex_ai/gemini-1.5-flash-latest") == [ assert router.route(request="vertex_ai/gemini-1.5-flash-latest") == [
@ -99,10 +99,10 @@ def test_pattern_to_regex():
Tests that the pattern is converted to a regex Tests that the pattern is converted to a regex
""" """
router = PatternMatchRouter() router = PatternMatchRouter()
assert router._pattern_to_regex("openai/*") == "^openai/.*$" assert router._pattern_to_regex("openai/*") == "openai/(.*)"
assert ( assert (
router._pattern_to_regex("openai/fo::*::static::*") router._pattern_to_regex("openai/fo::*::static::*")
== "^openai/fo::.*::static::.*$" == "openai/fo::(.*)::static::(.*)"
) )

View file

@ -914,3 +914,72 @@ def test_replace_model_in_jsonl(model_list):
router = Router(model_list=model_list) router = Router(model_list=model_list)
deployments = router.pattern_router.get_deployments_by_pattern(model="claude-3") deployments = router.pattern_router.get_deployments_by_pattern(model="claude-3")
assert deployments is not None assert deployments is not None
# def test_pattern_match_deployments(model_list):
# from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
# import re
# patter_router = PatternMatchRouter()
# request = "fo::hi::static::hello"
# model_name = "fo::*:static::*"
# model_name_regex = patter_router._pattern_to_regex(model_name)
# # Match against the request
# match = re.match(model_name_regex, request)
# print(f"match: {match}")
# print(f"match.end: {match.end()}")
# if match is None:
# raise ValueError("Match not found")
# updated_model = patter_router.set_deployment_model_name(
# matched_pattern=match, litellm_deployment_litellm_model="openai/*"
# )
# assert updated_model == "openai/fo::hi:static::hello"
@pytest.mark.parametrize(
"user_request_model, model_name, litellm_model, expected_model",
[
("llmengine/foo", "llmengine/*", "openai/foo", "openai/foo"),
("llmengine/foo", "llmengine/*", "openai/*", "openai/foo"),
(
"fo::hi::static::hello",
"fo::*::static::*",
"openai/fo::*:static::*",
"openai/fo::hi:static::hello",
),
(
"fo::hi::static::hello",
"fo::*::static::*",
"openai/gpt-3.5-turbo",
"openai/gpt-3.5-turbo",
),
],
)
def test_pattern_match_deployment_set_model_name(
user_request_model, model_name, litellm_model, expected_model
):
from re import Match
from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
pattern_router = PatternMatchRouter()
import re
# Convert model_name into a proper regex
model_name_regex = pattern_router._pattern_to_regex(model_name)
# Match against the request
match = re.match(model_name_regex, user_request_model)
if match is None:
raise ValueError("Match not found")
# Call the set_deployment_model_name function
updated_model = pattern_router.set_deployment_model_name(match, litellm_model)
print(updated_model) # Expected output: "openai/fo::hi:static::hello"
assert updated_model == expected_model