forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/23/2024) (#6407)
* docs(bedrock.md): clarify bedrock auth in litellm docs * fix(convert_dict_to_response.py): Fixes https://github.com/BerriAI/litellm/issues/6387 * feat(pattern_match_deployments.py): more robust handling for wildcard routes (model_name: custom_route/* -> openai/*) Enables user to expose custom routes to users with dynamic handling * test: add more testing * docs(custom_pricing.md): add debug tutorial for custom pricing * test: skip codestral test - unreachable backend * test: fix test * fix(pattern_matching_deployments.py): fix typing * test: cleanup codestral tests - backend api unavailable * (refactor) prometheus async_log_success_event to be under 100 LOC (#6416) * unit testig for prometheus * unit testing for success metrics * use 1 helper for _increment_token_metrics * use helper for _increment_remaining_budget_metrics * use _increment_remaining_budget_metrics * use _increment_top_level_request_and_spend_metrics * use helper for _set_latency_metrics * remove noqa violation * fix test prometheus * test prometheus * unit testing for all prometheus helper functions * fix prom unit tests * fix unit tests prometheus * fix unit test prom * (refactor) router - use static methods for client init utils (#6420) * use InitalizeOpenAISDKClient * use InitalizeOpenAISDKClient static method * fix # noqa: PLR0915 * (code cleanup) remove unused and undocumented logging integrations - litedebugger, berrispend (#6406) * code cleanup remove unused and undocumented code files * fix unused logging integrations cleanup * bump: version 1.50.3 → 1.50.4 --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
parent
c04c4a82f1
commit
1cd1d23fdf
9 changed files with 235 additions and 38 deletions
|
@ -9,12 +9,11 @@ LiteLLM requires `boto3` to be installed on your system for Bedrock requests
|
|||
pip install boto3>=1.28.57
|
||||
```
|
||||
|
||||
## Required Environment Variables
|
||||
```python
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = "" # Access key
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = "" # Secret access key
|
||||
os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
|
||||
```
|
||||
:::info
|
||||
|
||||
LiteLLM uses boto3 to handle authentication. All these options are supported - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#credentials.
|
||||
|
||||
:::
|
||||
|
||||
## Usage
|
||||
|
||||
|
@ -22,6 +21,7 @@ os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
|
|||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||
</a>
|
||||
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
@ -38,7 +38,7 @@ response = completion(
|
|||
|
||||
## LiteLLM Proxy Usage
|
||||
|
||||
Here's how to call Anthropic with the LiteLLM Proxy Server
|
||||
Here's how to call Bedrock with the LiteLLM Proxy Server
|
||||
|
||||
### 1. Setup config.yaml
|
||||
|
||||
|
|
|
@ -58,3 +58,33 @@ model_list:
|
|||
input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
|
||||
output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
If you're custom pricing is not being used or you're seeing errors, please check the following:
|
||||
|
||||
1. Run the proxy with `LITELLM_LOG="DEBUG"` or the `--detailed_debug` cli flag
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml --detailed_debug
|
||||
```
|
||||
|
||||
2. Check logs for this line:
|
||||
|
||||
```
|
||||
LiteLLM:DEBUG: utils.py:263 - litellm.acompletion
|
||||
```
|
||||
|
||||
3. Check if 'input_cost_per_token' and 'output_cost_per_token' are top-level keys in the acompletion function.
|
||||
|
||||
```bash
|
||||
acompletion(
|
||||
...,
|
||||
input_cost_per_token: my-custom-price,
|
||||
output_cost_per_token: my-custom-price,
|
||||
)
|
||||
```
|
||||
|
||||
If these keys are not present, LiteLLM will not use your custom pricing.
|
||||
|
||||
If the problem persists, please file an issue on [GitHub](https://github.com/BerriAI/litellm/issues).
|
|
@ -214,6 +214,28 @@ def _handle_invalid_parallel_tool_calls(
|
|||
return tool_calls
|
||||
|
||||
|
||||
class LiteLLMResponseObjectHandler:
|
||||
|
||||
@staticmethod
|
||||
def convert_to_image_response(
|
||||
response_object: dict,
|
||||
model_response_object: Optional[ImageResponse] = None,
|
||||
hidden_params: Optional[dict] = None,
|
||||
) -> ImageResponse:
|
||||
|
||||
response_object.update({"hidden_params": hidden_params})
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = ImageResponse(**response_object)
|
||||
return model_response_object
|
||||
else:
|
||||
model_response_dict = model_response_object.model_dump()
|
||||
|
||||
model_response_dict.update(response_object)
|
||||
model_response_object = ImageResponse(**model_response_dict)
|
||||
return model_response_object
|
||||
|
||||
|
||||
def convert_to_model_response_object( # noqa: PLR0915
|
||||
response_object: Optional[dict] = None,
|
||||
model_response_object: Optional[
|
||||
|
@ -238,7 +260,6 @@ def convert_to_model_response_object( # noqa: PLR0915
|
|||
] = None, # used for supporting 'json_schema' on older models
|
||||
):
|
||||
received_args = locals()
|
||||
|
||||
additional_headers = get_response_headers(_response_headers)
|
||||
|
||||
if hidden_params is None:
|
||||
|
@ -427,20 +448,11 @@ def convert_to_model_response_object( # noqa: PLR0915
|
|||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = ImageResponse()
|
||||
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"]
|
||||
|
||||
if "data" in response_object:
|
||||
model_response_object.data = response_object["data"]
|
||||
|
||||
if hidden_params is not None:
|
||||
model_response_object._hidden_params = hidden_params
|
||||
|
||||
return model_response_object
|
||||
return LiteLLMResponseObjectHandler.convert_to_image_response(
|
||||
response_object=response_object,
|
||||
model_response_object=model_response_object,
|
||||
hidden_params=hidden_params,
|
||||
)
|
||||
elif response_type == "audio_transcription" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, TranscriptionResponse)
|
||||
|
|
|
@ -1349,7 +1349,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
if aimg_generation is True:
|
||||
return self.aimage_generation(data=data, prompt=prompt, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore
|
||||
|
||||
openai_client = self._get_openai_client(
|
||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1371,8 +1371,9 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
)
|
||||
|
||||
## COMPLETION CALL
|
||||
response = openai_client.images.generate(**data, timeout=timeout) # type: ignore
|
||||
response = response.model_dump() # type: ignore
|
||||
_response = openai_client.images.generate(**data, timeout=timeout) # type: ignore
|
||||
|
||||
response = _response.model_dump()
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
|
@ -1380,7 +1381,6 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
additional_args={"complete_input_dict": data},
|
||||
original_response=response,
|
||||
)
|
||||
# return response
|
||||
return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation") # type: ignore
|
||||
except OpenAIError as e:
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ Class to handle llm wildcard routing and regex pattern matching
|
|||
|
||||
import copy
|
||||
import re
|
||||
from re import Match
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from litellm import get_llm_provider
|
||||
|
@ -53,11 +54,12 @@ class PatternMatchRouter:
|
|||
Returns:
|
||||
str: regex pattern
|
||||
"""
|
||||
# Replace '*' with '.*' for regex matching
|
||||
regex = pattern.replace("*", ".*")
|
||||
# Escape other special characters
|
||||
regex = re.escape(regex).replace(r"\.\*", ".*")
|
||||
return f"^{regex}$"
|
||||
# # Replace '*' with '.*' for regex matching
|
||||
# regex = pattern.replace("*", ".*")
|
||||
# # Escape other special characters
|
||||
# regex = re.escape(regex).replace(r"\.\*", ".*")
|
||||
# return f"^{regex}$"
|
||||
return re.escape(pattern).replace(r"\*", "(.*)")
|
||||
|
||||
def route(self, request: Optional[str]) -> Optional[List[Dict]]:
|
||||
"""
|
||||
|
@ -84,6 +86,44 @@ class PatternMatchRouter:
|
|||
|
||||
return None # No matching pattern found
|
||||
|
||||
@staticmethod
|
||||
def set_deployment_model_name(
|
||||
matched_pattern: Match,
|
||||
litellm_deployment_litellm_model: str,
|
||||
) -> str:
|
||||
"""
|
||||
Set the model name for the matched pattern llm deployment
|
||||
|
||||
E.g.:
|
||||
|
||||
model_name: llmengine/* (can be any regex pattern or wildcard pattern)
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
|
||||
if model_name = "llmengine/foo" -> model = "openai/foo"
|
||||
"""
|
||||
## BASE CASE: if the deployment model name does not contain a wildcard, return the deployment model name
|
||||
if "*" not in litellm_deployment_litellm_model:
|
||||
return litellm_deployment_litellm_model
|
||||
|
||||
wildcard_count = litellm_deployment_litellm_model.count("*")
|
||||
|
||||
# Extract all dynamic segments from the request
|
||||
dynamic_segments = matched_pattern.groups()
|
||||
|
||||
if len(dynamic_segments) > wildcard_count:
|
||||
raise ValueError(
|
||||
f"More wildcards in the deployment model name than the pattern. Wildcard count: {wildcard_count}, dynamic segments count: {len(dynamic_segments)}"
|
||||
)
|
||||
|
||||
# Replace the corresponding wildcards in the litellm model pattern with extracted segments
|
||||
for segment in dynamic_segments:
|
||||
litellm_deployment_litellm_model = litellm_deployment_litellm_model.replace(
|
||||
"*", segment, 1
|
||||
)
|
||||
|
||||
return litellm_deployment_litellm_model
|
||||
|
||||
def get_pattern(
|
||||
self, model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> Optional[List[Dict]]:
|
||||
|
|
|
@ -1177,12 +1177,15 @@ from openai.types.images_response import ImagesResponse as OpenAIImageResponse
|
|||
|
||||
class ImageResponse(OpenAIImageResponse):
|
||||
_hidden_params: dict = {}
|
||||
usage: Usage
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
created: Optional[int] = None,
|
||||
data: Optional[List[ImageObject]] = None,
|
||||
response_ms=None,
|
||||
usage: Optional[Usage] = None,
|
||||
hidden_params: Optional[dict] = None,
|
||||
):
|
||||
if response_ms:
|
||||
_response_ms = response_ms
|
||||
|
@ -1204,8 +1207,13 @@ class ImageResponse(OpenAIImageResponse):
|
|||
_data.append(ImageObject(**d))
|
||||
elif isinstance(d, BaseModel):
|
||||
_data.append(ImageObject(**d.model_dump()))
|
||||
super().__init__(created=created, data=_data)
|
||||
self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
||||
_usage = usage or Usage(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
super().__init__(created=created, data=_data, usage=_usage) # type: ignore
|
||||
self._hidden_params = hidden_params or {}
|
||||
|
||||
def __contains__(self, key):
|
||||
# Define custom behavior for the 'in' operator
|
||||
|
|
|
@ -695,3 +695,41 @@ def test_convert_to_model_response_object_error():
|
|||
_response_headers=None,
|
||||
convert_tool_call_to_json_mode=False,
|
||||
)
|
||||
|
||||
|
||||
def test_image_generation_openai_with_pydantic_warning(caplog):
|
||||
try:
|
||||
import logging
|
||||
from litellm.types.utils import ImageResponse, ImageObject
|
||||
|
||||
convert_response_args = {
|
||||
"response_object": {
|
||||
"created": 1729709945,
|
||||
"data": [
|
||||
{
|
||||
"b64_json": None,
|
||||
"revised_prompt": "Generate an image of a baby sea otter. It should look incredibly cute, with big, soulful eyes and a fluffy, wet fur coat. The sea otter should be on its back, as sea otters often do, with its tiny hands holding onto a shell as if it is its precious toy. The background should be a tranquil sea under a clear sky, with soft sunlight reflecting off the waters. The color palette should be soothing with blues, browns, and white.",
|
||||
"url": "https://oaidalleapiprodscus.blob.core.windows.net/private/org-ikDc4ex8NB5ZzfTf8m5WYVB7/user-JpwZsbIXubBZvan3Y3GchiiB/img-LL0uoOv4CFJIvNYxoNCKB8oc.png?st=2024-10-23T17%3A59%3A05Z&se=2024-10-23T19%3A59%3A05Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-10-22T19%3A26%3A22Z&ske=2024-10-23T19%3A26%3A22Z&sks=b&skv=2024-08-04&sig=Hl4wczJ3H2vZNdLRt/7JvNi6NvQGDnbNkDy15%2Bl3k5s%3D",
|
||||
}
|
||||
],
|
||||
},
|
||||
"model_response_object": ImageResponse(
|
||||
created=1729709929,
|
||||
data=[],
|
||||
),
|
||||
"response_type": "image_generation",
|
||||
"stream": False,
|
||||
"start_time": None,
|
||||
"end_time": None,
|
||||
"hidden_params": None,
|
||||
"_response_headers": None,
|
||||
"convert_tool_call_to_json_mode": None,
|
||||
}
|
||||
|
||||
resp: ImageResponse = convert_to_model_response_object(**convert_response_args)
|
||||
assert resp is not None
|
||||
assert resp.data is not None
|
||||
assert len(resp.data) == 1
|
||||
assert isinstance(resp.data[0], ImageObject)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Test failed with exception: {e}")
|
||||
|
|
|
@ -42,7 +42,7 @@ def test_add_pattern():
|
|||
)
|
||||
router.add_pattern("openai/*", deployment.to_json(exclude_none=True))
|
||||
assert len(router.patterns) == 1
|
||||
assert list(router.patterns.keys())[0] == "^openai/.*$"
|
||||
assert list(router.patterns.keys())[0] == "openai/(.*)"
|
||||
|
||||
# try getting the pattern
|
||||
assert router.route(request="openai/gpt-15") == [
|
||||
|
@ -64,7 +64,7 @@ def test_add_pattern_vertex_ai():
|
|||
)
|
||||
router.add_pattern("vertex_ai/*", deployment.to_json(exclude_none=True))
|
||||
assert len(router.patterns) == 1
|
||||
assert list(router.patterns.keys())[0] == "^vertex_ai/.*$"
|
||||
assert list(router.patterns.keys())[0] == "vertex_ai/(.*)"
|
||||
|
||||
# try getting the pattern
|
||||
assert router.route(request="vertex_ai/gemini-1.5-flash-latest") == [
|
||||
|
@ -99,10 +99,10 @@ def test_pattern_to_regex():
|
|||
Tests that the pattern is converted to a regex
|
||||
"""
|
||||
router = PatternMatchRouter()
|
||||
assert router._pattern_to_regex("openai/*") == "^openai/.*$"
|
||||
assert router._pattern_to_regex("openai/*") == "openai/(.*)"
|
||||
assert (
|
||||
router._pattern_to_regex("openai/fo::*::static::*")
|
||||
== "^openai/fo::.*::static::.*$"
|
||||
== "openai/fo::(.*)::static::(.*)"
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -914,3 +914,72 @@ def test_replace_model_in_jsonl(model_list):
|
|||
router = Router(model_list=model_list)
|
||||
deployments = router.pattern_router.get_deployments_by_pattern(model="claude-3")
|
||||
assert deployments is not None
|
||||
|
||||
|
||||
# def test_pattern_match_deployments(model_list):
|
||||
# from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
|
||||
# import re
|
||||
|
||||
# patter_router = PatternMatchRouter()
|
||||
|
||||
# request = "fo::hi::static::hello"
|
||||
# model_name = "fo::*:static::*"
|
||||
|
||||
# model_name_regex = patter_router._pattern_to_regex(model_name)
|
||||
|
||||
# # Match against the request
|
||||
# match = re.match(model_name_regex, request)
|
||||
|
||||
# print(f"match: {match}")
|
||||
# print(f"match.end: {match.end()}")
|
||||
# if match is None:
|
||||
# raise ValueError("Match not found")
|
||||
# updated_model = patter_router.set_deployment_model_name(
|
||||
# matched_pattern=match, litellm_deployment_litellm_model="openai/*"
|
||||
# )
|
||||
# assert updated_model == "openai/fo::hi:static::hello"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"user_request_model, model_name, litellm_model, expected_model",
|
||||
[
|
||||
("llmengine/foo", "llmengine/*", "openai/foo", "openai/foo"),
|
||||
("llmengine/foo", "llmengine/*", "openai/*", "openai/foo"),
|
||||
(
|
||||
"fo::hi::static::hello",
|
||||
"fo::*::static::*",
|
||||
"openai/fo::*:static::*",
|
||||
"openai/fo::hi:static::hello",
|
||||
),
|
||||
(
|
||||
"fo::hi::static::hello",
|
||||
"fo::*::static::*",
|
||||
"openai/gpt-3.5-turbo",
|
||||
"openai/gpt-3.5-turbo",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_pattern_match_deployment_set_model_name(
|
||||
user_request_model, model_name, litellm_model, expected_model
|
||||
):
|
||||
from re import Match
|
||||
from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
|
||||
|
||||
pattern_router = PatternMatchRouter()
|
||||
|
||||
import re
|
||||
|
||||
# Convert model_name into a proper regex
|
||||
model_name_regex = pattern_router._pattern_to_regex(model_name)
|
||||
|
||||
# Match against the request
|
||||
match = re.match(model_name_regex, user_request_model)
|
||||
|
||||
if match is None:
|
||||
raise ValueError("Match not found")
|
||||
|
||||
# Call the set_deployment_model_name function
|
||||
updated_model = pattern_router.set_deployment_model_name(match, litellm_model)
|
||||
|
||||
print(updated_model) # Expected output: "openai/fo::hi:static::hello"
|
||||
assert updated_model == expected_model
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue