forked from phoenix/litellm-mirror
LiteLLM Minor fixes + improvements (08/04/2024) (#5505)
* Minor IAM AWS OIDC Improvements (#5246)
* AWS IAM: Temporary tokens are valid across all regions after being issued, so it is wasteful to request one for each region.
* AWS IAM: Include an inline policy, to help reduce misuse of overly permissive IAM roles.
* (test_bedrock_completion.py): Ensure we are testing cross AWS region OIDC flow.
* fix(router.py): log rejected requests
Fixes https://github.com/BerriAI/litellm/issues/5498
* refactor: don't use verbose_logger.exception, if exception is raised
User might already have handling for this. But alerting systems in prod will raise this as an unhandled error.
* fix(datadog.py): support setting datadog source as an env var
Fixes https://github.com/BerriAI/litellm/issues/5508
* docs(logging.md): add dd_source to datadog docs
* fix(proxy_server.py): expose `/customer/list` endpoint for showing all customers
* (bedrock): Fix usage with Cloudflare AI Gateway, and proxies in general. (#5509)
* feat(anthropic.py): support 'cache_control' param for content when it is a string
* Revert "(bedrock): Fix usage with Cloudflare AI Gateway, and proxies in gener…" (#5519)
This reverts commit 3fac0349c2
.
* refactor: ci/cd run again
---------
Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
This commit is contained in:
parent
cdc312d51d
commit
1e7e538261
24 changed files with 383 additions and 247 deletions
|
@ -1,12 +1,12 @@
|
||||||
repos:
|
repos:
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
# - id: mypy
|
||||||
name: mypy
|
# name: mypy
|
||||||
entry: python3 -m mypy --ignore-missing-imports
|
# entry: python3 -m mypy --ignore-missing-imports
|
||||||
language: system
|
# language: system
|
||||||
types: [python]
|
# types: [python]
|
||||||
files: ^litellm/
|
# files: ^litellm/
|
||||||
- id: isort
|
- id: isort
|
||||||
name: isort
|
name: isort
|
||||||
entry: isort
|
entry: isort
|
||||||
|
|
|
@ -1426,6 +1426,7 @@ litellm_settings:
|
||||||
```shell
|
```shell
|
||||||
DD_API_KEY="5f2d0f310***********" # your datadog API Key
|
DD_API_KEY="5f2d0f310***********" # your datadog API Key
|
||||||
DD_SITE="us5.datadoghq.com" # your datadog base url
|
DD_SITE="us5.datadoghq.com" # your datadog base url
|
||||||
|
DD_SOURCE="litellm_dev" # [OPTIONAL] your datadog source. use to differentiate dev vs. prod deployments
|
||||||
```
|
```
|
||||||
|
|
||||||
**Step 3**: Start the proxy, make a test request
|
**Step 3**: Start the proxy, make a test request
|
||||||
|
|
|
@ -2039,10 +2039,7 @@ class DualCache(BaseCache):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise e # don't log if exception is raised
|
||||||
f"LiteLLM Cache: Excepton async add_cache: {str(e)}"
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
async def async_set_cache_sadd(
|
async def async_set_cache_sadd(
|
||||||
self, key, value: List, local_only: bool = False, **kwargs
|
self, key, value: List, local_only: bool = False, **kwargs
|
||||||
|
@ -2069,10 +2066,7 @@ class DualCache(BaseCache):
|
||||||
|
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise e # don't log, if exception is raised
|
||||||
"LiteLLM Cache: Excepton async set_cache_sadd: {}".format(str(e))
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def flush_cache(self):
|
def flush_cache(self):
|
||||||
if self.in_memory_cache is not None:
|
if self.in_memory_cache is not None:
|
||||||
|
@ -2543,7 +2537,6 @@ class Cache:
|
||||||
self.cache.set_cache(cache_key, cached_data, **kwargs)
|
self.cache.set_cache(cache_key, cached_data, **kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
verbose_logger.exception(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
||||||
pass
|
|
||||||
|
|
||||||
async def async_add_cache(self, result, *args, **kwargs):
|
async def async_add_cache(self, result, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -235,10 +235,7 @@ class BraintrustLogger(CustomLogger):
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
raise Exception(e.response.text)
|
raise Exception(e.response.text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise e # don't use verbose_logger.exception, if exception is raised
|
||||||
"Error logging to braintrust - Exception received - {}".format(str(e))
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
|
verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
|
||||||
|
@ -360,10 +357,7 @@ class BraintrustLogger(CustomLogger):
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
raise Exception(e.response.text)
|
raise Exception(e.response.text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise e # don't use verbose_logger.exception, if exception is raised
|
||||||
"Error logging to braintrust - Exception received - {}".format(str(e))
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
return super().log_failure_event(kwargs, response_obj, start_time, end_time)
|
return super().log_failure_event(kwargs, response_obj, start_time, end_time)
|
||||||
|
|
|
@ -1,11 +1,17 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success + failure, log events to Datadog
|
# On success + failure, log events to Datadog
|
||||||
|
|
||||||
import dotenv, os
|
import datetime
|
||||||
import requests # type: ignore
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import datetime, subprocess, sys
|
import uuid
|
||||||
import litellm, uuid
|
|
||||||
|
import dotenv
|
||||||
|
import requests # type: ignore
|
||||||
|
|
||||||
|
import litellm
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,9 +63,9 @@ class DataDogLogger:
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
# Define DataDog client
|
# Define DataDog client
|
||||||
from datadog_api_client.v2.api.logs_api import LogsApi
|
|
||||||
from datadog_api_client.v2 import ApiClient
|
from datadog_api_client.v2 import ApiClient
|
||||||
from datadog_api_client.v2.models import HTTPLogItem, HTTPLog
|
from datadog_api_client.v2.api.logs_api import LogsApi
|
||||||
|
from datadog_api_client.v2.models import HTTPLog, HTTPLogItem
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"datadog Logging - Enters logging function for model {kwargs}"
|
f"datadog Logging - Enters logging function for model {kwargs}"
|
||||||
|
@ -131,7 +137,7 @@ class DataDogLogger:
|
||||||
body = HTTPLog(
|
body = HTTPLog(
|
||||||
[
|
[
|
||||||
HTTPLogItem(
|
HTTPLogItem(
|
||||||
ddsource="litellm",
|
ddsource=os.getenv("DD_SOURCE", "litellm"),
|
||||||
message=payload,
|
message=payload,
|
||||||
service="litellm-server",
|
service="litellm-server",
|
||||||
),
|
),
|
||||||
|
|
|
@ -228,6 +228,54 @@ class AnthropicConfig:
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def translate_system_message(
|
||||||
|
self, messages: List[AllMessageValues]
|
||||||
|
) -> List[AnthropicSystemMessageContent]:
|
||||||
|
system_prompt_indices = []
|
||||||
|
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
|
||||||
|
for idx, message in enumerate(messages):
|
||||||
|
if message["role"] == "system":
|
||||||
|
valid_content: bool = False
|
||||||
|
system_message_block = ChatCompletionSystemMessage(**message)
|
||||||
|
if isinstance(system_message_block["content"], str):
|
||||||
|
anthropic_system_message_content = AnthropicSystemMessageContent(
|
||||||
|
type="text",
|
||||||
|
text=system_message_block["content"],
|
||||||
|
)
|
||||||
|
if "cache_control" in system_message_block:
|
||||||
|
anthropic_system_message_content["cache_control"] = (
|
||||||
|
system_message_block["cache_control"]
|
||||||
|
)
|
||||||
|
anthropic_system_message_list.append(
|
||||||
|
anthropic_system_message_content
|
||||||
|
)
|
||||||
|
valid_content = True
|
||||||
|
elif isinstance(message["content"], list):
|
||||||
|
for _content in message["content"]:
|
||||||
|
anthropic_system_message_content = (
|
||||||
|
AnthropicSystemMessageContent(
|
||||||
|
type=_content.get("type"),
|
||||||
|
text=_content.get("text"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if "cache_control" in _content:
|
||||||
|
anthropic_system_message_content["cache_control"] = (
|
||||||
|
_content["cache_control"]
|
||||||
|
)
|
||||||
|
|
||||||
|
anthropic_system_message_list.append(
|
||||||
|
anthropic_system_message_content
|
||||||
|
)
|
||||||
|
valid_content = True
|
||||||
|
|
||||||
|
if valid_content:
|
||||||
|
system_prompt_indices.append(idx)
|
||||||
|
if len(system_prompt_indices) > 0:
|
||||||
|
for idx in reversed(system_prompt_indices):
|
||||||
|
messages.pop(idx)
|
||||||
|
|
||||||
|
return anthropic_system_message_list
|
||||||
|
|
||||||
### FOR [BETA] `/v1/messages` endpoint support
|
### FOR [BETA] `/v1/messages` endpoint support
|
||||||
|
|
||||||
def translatable_anthropic_params(self) -> List:
|
def translatable_anthropic_params(self) -> List:
|
||||||
|
@ -314,7 +362,7 @@ class AnthropicConfig:
|
||||||
new_messages.append(user_message)
|
new_messages.append(user_message)
|
||||||
|
|
||||||
if len(new_user_content_list) > 0:
|
if len(new_user_content_list) > 0:
|
||||||
new_messages.append({"role": "user", "content": new_user_content_list})
|
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
|
||||||
|
|
||||||
if len(tool_message_list) > 0:
|
if len(tool_message_list) > 0:
|
||||||
new_messages.extend(tool_message_list)
|
new_messages.extend(tool_message_list)
|
||||||
|
@ -940,45 +988,11 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Separate system prompt from rest of message
|
# Separate system prompt from rest of message
|
||||||
system_prompt_indices = []
|
anthropic_system_message_list = AnthropicConfig().translate_system_message(
|
||||||
system_prompt = ""
|
messages=messages
|
||||||
anthropic_system_message_list = None
|
)
|
||||||
for idx, message in enumerate(messages):
|
|
||||||
if message["role"] == "system":
|
|
||||||
valid_content: bool = False
|
|
||||||
if isinstance(message["content"], str):
|
|
||||||
system_prompt += message["content"]
|
|
||||||
valid_content = True
|
|
||||||
elif isinstance(message["content"], list):
|
|
||||||
for _content in message["content"]:
|
|
||||||
anthropic_system_message_content = (
|
|
||||||
AnthropicSystemMessageContent(
|
|
||||||
type=_content.get("type"),
|
|
||||||
text=_content.get("text"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if "cache_control" in _content:
|
|
||||||
anthropic_system_message_content["cache_control"] = (
|
|
||||||
_content["cache_control"]
|
|
||||||
)
|
|
||||||
|
|
||||||
if anthropic_system_message_list is None:
|
|
||||||
anthropic_system_message_list = []
|
|
||||||
anthropic_system_message_list.append(
|
|
||||||
anthropic_system_message_content
|
|
||||||
)
|
|
||||||
valid_content = True
|
|
||||||
|
|
||||||
if valid_content:
|
|
||||||
system_prompt_indices.append(idx)
|
|
||||||
if len(system_prompt_indices) > 0:
|
|
||||||
for idx in reversed(system_prompt_indices):
|
|
||||||
messages.pop(idx)
|
|
||||||
if len(system_prompt) > 0:
|
|
||||||
optional_params["system"] = system_prompt
|
|
||||||
|
|
||||||
# Handling anthropic API Prompt Caching
|
# Handling anthropic API Prompt Caching
|
||||||
if anthropic_system_message_list is not None:
|
if len(anthropic_system_message_list) > 0:
|
||||||
optional_params["system"] = anthropic_system_message_list
|
optional_params["system"] = anthropic_system_message_list
|
||||||
# Format rest of message according to anthropic guidelines
|
# Format rest of message according to anthropic guidelines
|
||||||
try:
|
try:
|
||||||
|
@ -986,15 +1000,10 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.llms.anthropic.chat.py::completion() - Exception occurred - {}\nReceived Messages: {}".format(
|
|
||||||
str(e), messages
|
|
||||||
)
|
|
||||||
)
|
|
||||||
raise AnthropicError(
|
raise AnthropicError(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
message="{}\nReceived Messages={}".format(str(e), messages),
|
message="{}\nReceived Messages={}".format(str(e), messages),
|
||||||
)
|
) # don't use verbose_logger.exception, if exception is raised
|
||||||
|
|
||||||
## Load Config
|
## Load Config
|
||||||
config = litellm.AnthropicConfig.get_config()
|
config = litellm.AnthropicConfig.get_config()
|
||||||
|
|
|
@ -119,8 +119,6 @@ class BaseAWSLLM(BaseLLM):
|
||||||
"aws_web_identity_token": aws_web_identity_token,
|
"aws_web_identity_token": aws_web_identity_token,
|
||||||
"aws_role_name": aws_role_name,
|
"aws_role_name": aws_role_name,
|
||||||
"aws_session_name": aws_session_name,
|
"aws_session_name": aws_session_name,
|
||||||
"aws_region_name": aws_region_name,
|
|
||||||
"aws_sts_endpoint": sts_endpoint,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -147,6 +145,7 @@ class BaseAWSLLM(BaseLLM):
|
||||||
RoleSessionName=aws_session_name,
|
RoleSessionName=aws_session_name,
|
||||||
WebIdentityToken=oidc_token,
|
WebIdentityToken=oidc_token,
|
||||||
DurationSeconds=3600,
|
DurationSeconds=3600,
|
||||||
|
Policy='{"Version":"2012-10-17","Statement":[{"Sid":"BedrockLiteLLM","Effect":"Allow","Action":["bedrock:InvokeModel","bedrock:InvokeModelWithResponseStream"],"Resource":"*","Condition":{"Bool":{"aws:SecureTransport":"true"},"StringLike":{"aws:UserAgent":"litellm/*"}}}]}',
|
||||||
)
|
)
|
||||||
|
|
||||||
iam_creds_dict = {
|
iam_creds_dict = {
|
||||||
|
@ -164,6 +163,11 @@ class BaseAWSLLM(BaseLLM):
|
||||||
ttl=3600 - 60,
|
ttl=3600 - 60,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if sts_response["PackedPolicySize"] > 75:
|
||||||
|
verbose_logger.warning(
|
||||||
|
f"The policy size is greater than 75% of the allowed size, PackedPolicySize: {sts_response['PackedPolicySize']}"
|
||||||
|
)
|
||||||
|
|
||||||
session = boto3.Session(**iam_creds_dict)
|
session = boto3.Session(**iam_creds_dict)
|
||||||
|
|
||||||
iam_creds = session.get_credentials()
|
iam_creds = session.get_credentials()
|
||||||
|
|
|
@ -423,13 +423,7 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
||||||
async for transformed_chunk in streamwrapper:
|
async for transformed_chunk in streamwrapper:
|
||||||
yield transformed_chunk
|
yield transformed_chunk
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise e # don't use verbose_logger.exception, if exception is raised
|
||||||
"LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
async def ollama_acompletion(
|
async def ollama_acompletion(
|
||||||
|
@ -498,12 +492,7 @@ async def ollama_acompletion(
|
||||||
)
|
)
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise e # don't use verbose_logger.exception, if exception is raised
|
||||||
"LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
async def ollama_aembeddings(
|
async def ollama_aembeddings(
|
||||||
|
|
|
@ -583,8 +583,4 @@ async def ollama_acompletion(
|
||||||
)
|
)
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise e # don't use verbose_logger.exception, if exception is raised
|
||||||
"LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
|
|
||||||
)
|
|
||||||
|
|
||||||
raise e
|
|
||||||
|
|
|
@ -168,9 +168,6 @@ def completion(
|
||||||
choices_list.append(choice_obj)
|
choices_list.append(choice_obj)
|
||||||
model_response.choices = choices_list # type: ignore
|
model_response.choices = choices_list # type: ignore
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
|
|
||||||
)
|
|
||||||
raise PalmError(
|
raise PalmError(
|
||||||
message=traceback.format_exc(), status_code=response.status_code
|
message=traceback.format_exc(), status_code=response.status_code
|
||||||
)
|
)
|
||||||
|
|
|
@ -564,12 +564,9 @@ class PredibaseChatCompletion(BaseLLM):
|
||||||
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
||||||
if isinstance(e, exception):
|
if isinstance(e, exception):
|
||||||
raise e
|
raise e
|
||||||
verbose_logger.exception(
|
raise PredibaseError(
|
||||||
"litellm.llms.predibase.py::async_completion() - Exception occurred - {}".format(
|
status_code=500, message="{}".format(str(e))
|
||||||
str(e)
|
) # don't use verbose_logger.exception, if exception is raised
|
||||||
)
|
|
||||||
)
|
|
||||||
raise PredibaseError(status_code=500, message="{}".format(str(e)))
|
|
||||||
return self.process_response(
|
return self.process_response(
|
||||||
model=model,
|
model=model,
|
||||||
response=response,
|
response=response,
|
||||||
|
|
|
@ -27,10 +27,13 @@ from litellm.types.completion import (
|
||||||
from litellm.types.llms.anthropic import *
|
from litellm.types.llms.anthropic import *
|
||||||
from litellm.types.llms.bedrock import MessageBlock as BedrockMessageBlock
|
from litellm.types.llms.bedrock import MessageBlock as BedrockMessageBlock
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
AllMessageValues,
|
||||||
ChatCompletionAssistantMessage,
|
ChatCompletionAssistantMessage,
|
||||||
|
ChatCompletionAssistantToolCall,
|
||||||
ChatCompletionFunctionMessage,
|
ChatCompletionFunctionMessage,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
ChatCompletionToolMessage,
|
ChatCompletionToolMessage,
|
||||||
|
ChatCompletionUserMessage,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import GenericImageParsingChunk
|
from litellm.types.utils import GenericImageParsingChunk
|
||||||
|
|
||||||
|
@ -493,10 +496,9 @@ def hf_chat_template(model: str, messages: list, chat_template: Optional[Any] =
|
||||||
|
|
||||||
return rendered_text
|
return rendered_text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
raise Exception(
|
||||||
"Error rendering huggingface chat template - {}".format(str(e))
|
f"Error rendering template - {str(e)}"
|
||||||
)
|
) # don't use verbose_logger.exception, if exception is raised
|
||||||
raise Exception(f"Error rendering template - {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
# Anthropic template
|
# Anthropic template
|
||||||
|
@ -1171,7 +1173,9 @@ def convert_to_gemini_tool_call_result(
|
||||||
return _part
|
return _part
|
||||||
|
|
||||||
|
|
||||||
def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResultParam:
|
def convert_to_anthropic_tool_result(
|
||||||
|
message: Union[dict, ChatCompletionToolMessage, ChatCompletionFunctionMessage]
|
||||||
|
) -> AnthropicMessagesToolResultParam:
|
||||||
"""
|
"""
|
||||||
OpenAI message with a tool result looks like:
|
OpenAI message with a tool result looks like:
|
||||||
{
|
{
|
||||||
|
@ -1215,7 +1219,7 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
|
||||||
return anthropic_tool_result
|
return anthropic_tool_result
|
||||||
if message["role"] == "function":
|
if message["role"] == "function":
|
||||||
content = message.get("content") # type: ignore
|
content = message.get("content") # type: ignore
|
||||||
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())
|
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4()) # type: ignore
|
||||||
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
||||||
type="tool_result", tool_use_id=tool_call_id, content=content
|
type="tool_result", tool_use_id=tool_call_id, content=content
|
||||||
)
|
)
|
||||||
|
@ -1230,7 +1234,7 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
|
||||||
|
|
||||||
|
|
||||||
def convert_function_to_anthropic_tool_invoke(
|
def convert_function_to_anthropic_tool_invoke(
|
||||||
function_call,
|
function_call: Union[dict, ChatCompletionToolCallFunctionChunk],
|
||||||
) -> List[AnthropicMessagesToolUseParam]:
|
) -> List[AnthropicMessagesToolUseParam]:
|
||||||
try:
|
try:
|
||||||
anthropic_tool_invoke = [
|
anthropic_tool_invoke = [
|
||||||
|
@ -1247,7 +1251,7 @@ def convert_function_to_anthropic_tool_invoke(
|
||||||
|
|
||||||
|
|
||||||
def convert_to_anthropic_tool_invoke(
|
def convert_to_anthropic_tool_invoke(
|
||||||
tool_calls: list,
|
tool_calls: List[ChatCompletionAssistantToolCall],
|
||||||
) -> List[AnthropicMessagesToolUseParam]:
|
) -> List[AnthropicMessagesToolUseParam]:
|
||||||
"""
|
"""
|
||||||
OpenAI tool invokes:
|
OpenAI tool invokes:
|
||||||
|
@ -1307,17 +1311,19 @@ def add_cache_control_to_content(
|
||||||
anthropic_content_element: Union[
|
anthropic_content_element: Union[
|
||||||
dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam
|
dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam
|
||||||
],
|
],
|
||||||
orignal_content_element: dict,
|
orignal_content_element: Union[dict, AllMessageValues],
|
||||||
):
|
):
|
||||||
if "cache_control" in orignal_content_element:
|
cache_control_param = orignal_content_element.get("cache_control")
|
||||||
anthropic_content_element["cache_control"] = orignal_content_element[
|
if cache_control_param is not None and isinstance(cache_control_param, dict):
|
||||||
"cache_control"
|
transformed_param = ChatCompletionCachedContent(**cache_control_param) # type: ignore
|
||||||
]
|
|
||||||
|
anthropic_content_element["cache_control"] = transformed_param
|
||||||
|
|
||||||
return anthropic_content_element
|
return anthropic_content_element
|
||||||
|
|
||||||
|
|
||||||
def anthropic_messages_pt(
|
def anthropic_messages_pt(
|
||||||
messages: list,
|
messages: List[AllMessageValues],
|
||||||
model: str,
|
model: str,
|
||||||
llm_provider: str,
|
llm_provider: str,
|
||||||
) -> List[
|
) -> List[
|
||||||
|
@ -1348,10 +1354,21 @@ def anthropic_messages_pt(
|
||||||
while msg_i < len(messages):
|
while msg_i < len(messages):
|
||||||
user_content: List[AnthropicMessagesUserMessageValues] = []
|
user_content: List[AnthropicMessagesUserMessageValues] = []
|
||||||
init_msg_i = msg_i
|
init_msg_i = msg_i
|
||||||
|
if isinstance(messages[msg_i], BaseModel):
|
||||||
|
messages[msg_i] = dict(messages[msg_i]) # type: ignore
|
||||||
## MERGE CONSECUTIVE USER CONTENT ##
|
## MERGE CONSECUTIVE USER CONTENT ##
|
||||||
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
|
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
|
||||||
if isinstance(messages[msg_i]["content"], list):
|
user_message_types_block: Union[
|
||||||
for m in messages[msg_i]["content"]:
|
ChatCompletionToolMessage,
|
||||||
|
ChatCompletionUserMessage,
|
||||||
|
ChatCompletionFunctionMessage,
|
||||||
|
] = messages[
|
||||||
|
msg_i
|
||||||
|
] # type: ignore
|
||||||
|
if user_message_types_block["content"] and isinstance(
|
||||||
|
user_message_types_block["content"], list
|
||||||
|
):
|
||||||
|
for m in user_message_types_block["content"]:
|
||||||
if m.get("type", "") == "image_url":
|
if m.get("type", "") == "image_url":
|
||||||
image_chunk = convert_to_anthropic_image_obj(
|
image_chunk = convert_to_anthropic_image_obj(
|
||||||
m["image_url"]["url"]
|
m["image_url"]["url"]
|
||||||
|
@ -1382,15 +1399,24 @@ def anthropic_messages_pt(
|
||||||
)
|
)
|
||||||
user_content.append(anthropic_content_element)
|
user_content.append(anthropic_content_element)
|
||||||
elif (
|
elif (
|
||||||
messages[msg_i]["role"] == "tool"
|
user_message_types_block["role"] == "tool"
|
||||||
or messages[msg_i]["role"] == "function"
|
or user_message_types_block["role"] == "function"
|
||||||
):
|
):
|
||||||
# OpenAI's tool message content will always be a string
|
# OpenAI's tool message content will always be a string
|
||||||
user_content.append(convert_to_anthropic_tool_result(messages[msg_i]))
|
|
||||||
else:
|
|
||||||
user_content.append(
|
user_content.append(
|
||||||
{"type": "text", "text": messages[msg_i]["content"]}
|
convert_to_anthropic_tool_result(user_message_types_block)
|
||||||
)
|
)
|
||||||
|
elif isinstance(user_message_types_block["content"], str):
|
||||||
|
_anthropic_content_text_element: AnthropicMessagesTextParam = {
|
||||||
|
"type": "text",
|
||||||
|
"text": user_message_types_block["content"],
|
||||||
|
}
|
||||||
|
anthropic_content_element = add_cache_control_to_content(
|
||||||
|
anthropic_content_element=_anthropic_content_text_element,
|
||||||
|
orignal_content_element=user_message_types_block,
|
||||||
|
)
|
||||||
|
|
||||||
|
user_content.append(anthropic_content_element)
|
||||||
|
|
||||||
msg_i += 1
|
msg_i += 1
|
||||||
|
|
||||||
|
@ -1400,10 +1426,11 @@ def anthropic_messages_pt(
|
||||||
assistant_content: List[AnthropicMessagesAssistantMessageValues] = []
|
assistant_content: List[AnthropicMessagesAssistantMessageValues] = []
|
||||||
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||||
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||||
if "content" in messages[msg_i] and isinstance(
|
assistant_content_block: ChatCompletionAssistantMessage = messages[msg_i] # type: ignore
|
||||||
messages[msg_i]["content"], list
|
if "content" in assistant_content_block and isinstance(
|
||||||
|
assistant_content_block["content"], list
|
||||||
):
|
):
|
||||||
for m in messages[msg_i]["content"]:
|
for m in assistant_content_block["content"]:
|
||||||
# handle text
|
# handle text
|
||||||
if (
|
if (
|
||||||
m.get("type", "") == "text" and len(m.get("text", "")) > 0
|
m.get("type", "") == "text" and len(m.get("text", "")) > 0
|
||||||
|
@ -1417,35 +1444,37 @@ def anthropic_messages_pt(
|
||||||
)
|
)
|
||||||
assistant_content.append(anthropic_message)
|
assistant_content.append(anthropic_message)
|
||||||
elif (
|
elif (
|
||||||
"content" in messages[msg_i]
|
"content" in assistant_content_block
|
||||||
and isinstance(messages[msg_i]["content"], str)
|
and isinstance(assistant_content_block["content"], str)
|
||||||
and len(messages[msg_i]["content"])
|
and assistant_content_block[
|
||||||
> 0 # don't pass empty text blocks. anthropic api raises errors.
|
"content"
|
||||||
|
] # don't pass empty text blocks. anthropic api raises errors.
|
||||||
):
|
):
|
||||||
|
|
||||||
_anthropic_text_content_element = {
|
_anthropic_text_content_element = {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": messages[msg_i]["content"],
|
"text": assistant_content_block["content"],
|
||||||
}
|
}
|
||||||
|
|
||||||
anthropic_content_element = add_cache_control_to_content(
|
anthropic_content_element = add_cache_control_to_content(
|
||||||
anthropic_content_element=_anthropic_text_content_element,
|
anthropic_content_element=_anthropic_text_content_element,
|
||||||
orignal_content_element=messages[msg_i],
|
orignal_content_element=assistant_content_block,
|
||||||
)
|
)
|
||||||
assistant_content.append(anthropic_content_element)
|
assistant_content.append(anthropic_content_element)
|
||||||
|
|
||||||
if messages[msg_i].get(
|
assistant_tool_calls = assistant_content_block.get("tool_calls")
|
||||||
"tool_calls", []
|
if (
|
||||||
|
assistant_tool_calls is not None
|
||||||
): # support assistant tool invoke conversion
|
): # support assistant tool invoke conversion
|
||||||
assistant_content.extend(
|
assistant_content.extend(
|
||||||
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
|
convert_to_anthropic_tool_invoke(assistant_tool_calls)
|
||||||
)
|
)
|
||||||
|
|
||||||
if messages[msg_i].get("function_call"):
|
assistant_function_call = assistant_content_block.get("function_call")
|
||||||
|
|
||||||
|
if assistant_function_call is not None:
|
||||||
assistant_content.extend(
|
assistant_content.extend(
|
||||||
convert_function_to_anthropic_tool_invoke(
|
convert_function_to_anthropic_tool_invoke(assistant_function_call)
|
||||||
messages[msg_i]["function_call"]
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
msg_i += 1
|
msg_i += 1
|
||||||
|
|
|
@ -491,14 +491,9 @@ class CodestralTextCompletion(BaseLLM):
|
||||||
message="HTTPStatusError - {}".format(e.response.text),
|
message="HTTPStatusError - {}".format(e.response.text),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.llms.text_completion_codestral.py::async_completion() - Exception occurred - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
raise TextCompletionCodestralError(
|
raise TextCompletionCodestralError(
|
||||||
status_code=500, message="{}".format(str(e))
|
status_code=500, message="{}".format(str(e))
|
||||||
)
|
) # don't use verbose_logger.exception, if exception is raised
|
||||||
return self.process_text_completion_response(
|
return self.process_text_completion_response(
|
||||||
model=model,
|
model=model,
|
||||||
response=response,
|
response=response,
|
||||||
|
|
|
@ -445,9 +445,6 @@ async def acompletion(
|
||||||
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
|
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.main.py::acompletion() - Exception occurred - {}".format(str(e))
|
|
||||||
)
|
|
||||||
custom_llm_provider = custom_llm_provider or "openai"
|
custom_llm_provider = custom_llm_provider or "openai"
|
||||||
raise exception_type(
|
raise exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -616,9 +613,6 @@ def mock_completion(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if isinstance(e, openai.APIError):
|
if isinstance(e, openai.APIError):
|
||||||
raise e
|
raise e
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.mock_completion(): Exception occured - {}".format(str(e))
|
|
||||||
)
|
|
||||||
raise Exception("Mock completion response failed")
|
raise Exception("Mock completion response failed")
|
||||||
|
|
||||||
|
|
||||||
|
@ -5125,9 +5119,6 @@ async def ahealth_check(
|
||||||
response = {} # args like remaining ratelimit etc.
|
response = {} # args like remaining ratelimit etc.
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.ahealth_check(): Exception occured - {}".format(str(e))
|
|
||||||
)
|
|
||||||
stack_trace = traceback.format_exc()
|
stack_trace = traceback.format_exc()
|
||||||
if isinstance(stack_trace, str):
|
if isinstance(stack_trace, str):
|
||||||
stack_trace = stack_trace[:1000]
|
stack_trace = stack_trace[:1000]
|
||||||
|
|
|
@ -1,6 +1,16 @@
|
||||||
|
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: "whisper"
|
- model_name: gpt-4o-mini-2024-07-18
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "azure/azure-whisper"
|
api_key: API_KEY
|
||||||
api_key: os.environ/AZURE_EUROPE_API_KEY
|
model: openai/gpt-4o-mini-2024-07-18
|
||||||
api_base: "https://my-endpoint-europe-berri-992.openai.azure.com/"
|
rpm: 0
|
||||||
|
tpm: 100
|
||||||
|
|
||||||
|
router_settings:
|
||||||
|
num_retries: 0
|
||||||
|
routing_strategy: usage-based-routing-v2
|
||||||
|
timeout: 10
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: custom_callbacks.proxy_handler_instance
|
||||||
|
|
|
@ -386,7 +386,6 @@ async def user_api_key_auth(
|
||||||
parent_otel_span=parent_otel_span,
|
parent_otel_span=parent_otel_span,
|
||||||
)
|
)
|
||||||
#### ELSE ####
|
#### ELSE ####
|
||||||
|
|
||||||
## CHECK PASS-THROUGH ENDPOINTS ##
|
## CHECK PASS-THROUGH ENDPOINTS ##
|
||||||
if pass_through_endpoints is not None:
|
if pass_through_endpoints is not None:
|
||||||
for endpoint in pass_through_endpoints:
|
for endpoint in pass_through_endpoints:
|
||||||
|
|
|
@ -1,66 +1,10 @@
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
import litellm
|
|
||||||
|
|
||||||
|
|
||||||
# This file includes the custom callbacks for LiteLLM Proxy
|
|
||||||
# Once defined, these can be passed in proxy_config.yaml
|
|
||||||
class MyCustomHandler(CustomLogger):
|
class MyCustomHandler(CustomLogger):
|
||||||
def log_pre_api_call(self, model, messages, kwargs):
|
|
||||||
print(f"Pre-API Call") # noqa
|
|
||||||
|
|
||||||
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
|
||||||
print(f"Post-API Call") # noqa
|
|
||||||
|
|
||||||
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
|
||||||
print(f"On Stream") # noqa
|
|
||||||
|
|
||||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
||||||
print("On Success") # noqa
|
|
||||||
|
|
||||||
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
|
||||||
print(f"On Failure") # noqa
|
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
||||||
print(f"ishaan async_log_success_event") # noqa
|
|
||||||
# log: key, user, model, prompt, response, tokens, cost
|
|
||||||
# Access kwargs passed to litellm.completion()
|
|
||||||
model = kwargs.get("model", None)
|
|
||||||
messages = kwargs.get("messages", None)
|
|
||||||
user = kwargs.get("user", None)
|
|
||||||
|
|
||||||
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
|
||||||
litellm_params = kwargs.get("litellm_params", {})
|
|
||||||
metadata = litellm_params.get(
|
|
||||||
"metadata", {}
|
|
||||||
) # headers passed to LiteLLM proxy, can be found here
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
try:
|
# print("Call failed")
|
||||||
print(f"On Async Failure !") # noqa
|
pass
|
||||||
print("\nkwargs", kwargs) # noqa
|
|
||||||
# Access kwargs passed to litellm.completion()
|
|
||||||
model = kwargs.get("model", None)
|
|
||||||
messages = kwargs.get("messages", None)
|
|
||||||
user = kwargs.get("user", None)
|
|
||||||
|
|
||||||
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
|
||||||
litellm_params = kwargs.get("litellm_params", {})
|
|
||||||
metadata = litellm_params.get(
|
|
||||||
"metadata", {}
|
|
||||||
) # headers passed to LiteLLM proxy, can be found here
|
|
||||||
|
|
||||||
# Acess Exceptions & Traceback
|
|
||||||
exception_event = kwargs.get("exception", None)
|
|
||||||
traceback_event = kwargs.get("traceback_exception", None)
|
|
||||||
|
|
||||||
# Calculate cost using litellm.completion_cost()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception: {e}") # noqa
|
|
||||||
|
|
||||||
|
|
||||||
proxy_handler_instance = MyCustomHandler()
|
proxy_handler_instance = MyCustomHandler()
|
||||||
|
|
||||||
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
|
|
||||||
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
|
||||||
|
|
|
@ -6183,6 +6183,64 @@ async def delete_end_user(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/customer/list",
|
||||||
|
tags=["Customer Management"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
response_model=List[LiteLLM_EndUserTable],
|
||||||
|
)
|
||||||
|
@router.get(
|
||||||
|
"/end_user/list",
|
||||||
|
tags=["Customer Management"],
|
||||||
|
include_in_schema=False,
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def list_team(
|
||||||
|
http_request: Request,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
[Admin-only] List all available customers
|
||||||
|
|
||||||
|
```
|
||||||
|
curl --location --request GET 'http://0.0.0.0:4000/customer/list' \
|
||||||
|
--header 'Authorization: Bearer sk-1234'
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
from litellm.proxy.proxy_server import (
|
||||||
|
_duration_in_seconds,
|
||||||
|
create_audit_log_for_update,
|
||||||
|
litellm_proxy_admin_name,
|
||||||
|
prisma_client,
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
||||||
|
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
||||||
|
):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=401,
|
||||||
|
detail={
|
||||||
|
"error": "Admin-only endpoint. Your user role={}".format(
|
||||||
|
user_api_key_dict.user_role
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await prisma_client.db.litellm_endusertable.find_many()
|
||||||
|
|
||||||
|
returned_response: List[LiteLLM_EndUserTable] = []
|
||||||
|
for item in response:
|
||||||
|
returned_response.append(LiteLLM_EndUserTable(**item.model_dump()))
|
||||||
|
return returned_response
|
||||||
|
|
||||||
|
|
||||||
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
||||||
if premium_user is not True:
|
if premium_user is not True:
|
||||||
return
|
return
|
||||||
|
|
|
@ -47,6 +47,7 @@ from litellm._logging import verbose_router_logger
|
||||||
from litellm.assistants.main import AssistantDeleted
|
from litellm.assistants.main import AssistantDeleted
|
||||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||||
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||||
|
@ -783,6 +784,10 @@ class Router:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logging_obj: Optional[LiteLLMLogging] = kwargs.get(
|
||||||
|
"litellm_logging_obj", None
|
||||||
|
)
|
||||||
|
|
||||||
rpm_semaphore = self._get_client(
|
rpm_semaphore = self._get_client(
|
||||||
deployment=deployment,
|
deployment=deployment,
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
|
@ -797,11 +802,13 @@ class Router:
|
||||||
- If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
|
- If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
|
||||||
"""
|
"""
|
||||||
await self.async_routing_strategy_pre_call_checks(
|
await self.async_routing_strategy_pre_call_checks(
|
||||||
deployment=deployment
|
deployment=deployment, logging_obj=logging_obj
|
||||||
)
|
)
|
||||||
response = await _response
|
response = await _response
|
||||||
else:
|
else:
|
||||||
await self.async_routing_strategy_pre_call_checks(deployment=deployment)
|
await self.async_routing_strategy_pre_call_checks(
|
||||||
|
deployment=deployment, logging_obj=logging_obj
|
||||||
|
)
|
||||||
response = await _response
|
response = await _response
|
||||||
|
|
||||||
## CHECK CONTENT FILTER ERROR ##
|
## CHECK CONTENT FILTER ERROR ##
|
||||||
|
@ -3860,7 +3867,9 @@ class Router:
|
||||||
if isinstance(_callback, CustomLogger):
|
if isinstance(_callback, CustomLogger):
|
||||||
response = _callback.pre_call_check(deployment)
|
response = _callback.pre_call_check(deployment)
|
||||||
|
|
||||||
async def async_routing_strategy_pre_call_checks(self, deployment: dict):
|
async def async_routing_strategy_pre_call_checks(
|
||||||
|
self, deployment: dict, logging_obj: Optional[LiteLLMLogging] = None
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.
|
For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.
|
||||||
|
|
||||||
|
@ -3875,8 +3884,22 @@ class Router:
|
||||||
for _callback in litellm.callbacks:
|
for _callback in litellm.callbacks:
|
||||||
if isinstance(_callback, CustomLogger):
|
if isinstance(_callback, CustomLogger):
|
||||||
try:
|
try:
|
||||||
response = await _callback.async_pre_call_check(deployment)
|
_ = await _callback.async_pre_call_check(deployment)
|
||||||
except litellm.RateLimitError as e:
|
except litellm.RateLimitError as e:
|
||||||
|
## LOG FAILURE EVENT
|
||||||
|
if logging_obj is not None:
|
||||||
|
asyncio.create_task(
|
||||||
|
logging_obj.async_failure_handler(
|
||||||
|
exception=e,
|
||||||
|
traceback_exception=traceback.format_exc(),
|
||||||
|
end_time=time.time(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
## LOGGING
|
||||||
|
threading.Thread(
|
||||||
|
target=logging_obj.failure_handler,
|
||||||
|
args=(e, traceback.format_exc()),
|
||||||
|
).start() # log response
|
||||||
self._set_cooldown_deployments(
|
self._set_cooldown_deployments(
|
||||||
exception_status=e.status_code,
|
exception_status=e.status_code,
|
||||||
original_exception=e,
|
original_exception=e,
|
||||||
|
@ -3885,6 +3908,20 @@ class Router:
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
## LOG FAILURE EVENT
|
||||||
|
if logging_obj is not None:
|
||||||
|
asyncio.create_task(
|
||||||
|
logging_obj.async_failure_handler(
|
||||||
|
exception=e,
|
||||||
|
traceback_exception=traceback.format_exc(),
|
||||||
|
end_time=time.time(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
## LOGGING
|
||||||
|
threading.Thread(
|
||||||
|
target=logging_obj.failure_handler,
|
||||||
|
args=(e, traceback.format_exc()),
|
||||||
|
).start() # log response
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def _generate_model_id(self, model_group: str, litellm_params: dict):
|
def _generate_model_id(self, model_group: str, litellm_params: dict):
|
||||||
|
|
|
@ -222,6 +222,94 @@ async def test_anthropic_api_prompt_caching_basic():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_anthropic_api_prompt_caching_with_content_str():
|
||||||
|
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||||
|
|
||||||
|
system_message = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "Here is the full text of a complex legal agreement",
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
translated_system_message = litellm.AnthropicConfig().translate_system_message(
|
||||||
|
messages=system_message
|
||||||
|
)
|
||||||
|
|
||||||
|
assert translated_system_message == [
|
||||||
|
# System Message
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Here is the full text of a complex legal agreement",
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
user_messages = [
|
||||||
|
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What are the key terms and conditions in this agreement?",
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
||||||
|
},
|
||||||
|
# The final turn is marked with cache-control, for continuing in followups.
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What are the key terms and conditions in this agreement?",
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
translated_messages = anthropic_messages_pt(
|
||||||
|
messages=user_messages,
|
||||||
|
model="claude-3-5-sonnet-20240620",
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What are the key terms and conditions in this agreement?",
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
# The final turn is marked with cache-control, for continuing in followups.
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What are the key terms and conditions in this agreement?",
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
assert len(translated_messages) == len(expected_messages)
|
||||||
|
for idx, i in enumerate(translated_messages):
|
||||||
|
assert (
|
||||||
|
i == expected_messages[idx]
|
||||||
|
), "Error on idx={}. Got={}, Expected={}".format(idx, i, expected_messages[idx])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio()
|
@pytest.mark.asyncio()
|
||||||
async def test_anthropic_api_prompt_caching_no_headers():
|
async def test_anthropic_api_prompt_caching_no_headers():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
|
@ -616,8 +616,8 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
|
||||||
aws_region_name=aws_region_name,
|
aws_region_name=aws_region_name,
|
||||||
aws_web_identity_token=aws_web_identity_token,
|
aws_web_identity_token=aws_web_identity_token,
|
||||||
aws_role_name=aws_role_name,
|
aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
aws_session_name="cross-region-test",
|
||||||
aws_sts_endpoint="https://sts-fips.us-west-2.amazonaws.com",
|
aws_sts_endpoint="https://sts-fips.us-east-2.amazonaws.com",
|
||||||
aws_bedrock_runtime_endpoint="https://bedrock-runtime-fips.us-west-2.amazonaws.com",
|
aws_bedrock_runtime_endpoint="https://bedrock-runtime-fips.us-west-2.amazonaws.com",
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
|
|
|
@ -3,6 +3,8 @@ from typing import Any, Dict, Iterable, List, Optional, Union
|
||||||
from pydantic import BaseModel, validator
|
from pydantic import BaseModel, validator
|
||||||
from typing_extensions import Literal, Required, TypedDict
|
from typing_extensions import Literal, Required, TypedDict
|
||||||
|
|
||||||
|
from .openai import ChatCompletionCachedContent
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesToolChoice(TypedDict, total=False):
|
class AnthropicMessagesToolChoice(TypedDict, total=False):
|
||||||
type: Required[Literal["auto", "any", "tool"]]
|
type: Required[Literal["auto", "any", "tool"]]
|
||||||
|
@ -18,7 +20,7 @@ class AnthropicMessagesTool(TypedDict, total=False):
|
||||||
class AnthropicMessagesTextParam(TypedDict, total=False):
|
class AnthropicMessagesTextParam(TypedDict, total=False):
|
||||||
type: Literal["text"]
|
type: Literal["text"]
|
||||||
text: str
|
text: str
|
||||||
cache_control: Optional[dict]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesToolUseParam(TypedDict):
|
class AnthropicMessagesToolUseParam(TypedDict):
|
||||||
|
@ -58,7 +60,7 @@ class AnthropicImageParamSource(TypedDict):
|
||||||
class AnthropicMessagesImageParam(TypedDict, total=False):
|
class AnthropicMessagesImageParam(TypedDict, total=False):
|
||||||
type: Literal["image"]
|
type: Literal["image"]
|
||||||
source: AnthropicImageParamSource
|
source: AnthropicImageParamSource
|
||||||
cache_control: Optional[dict]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesToolResultContent(TypedDict):
|
class AnthropicMessagesToolResultContent(TypedDict):
|
||||||
|
@ -97,7 +99,7 @@ class AnthropicMetadata(TypedDict, total=False):
|
||||||
class AnthropicSystemMessageContent(TypedDict, total=False):
|
class AnthropicSystemMessageContent(TypedDict, total=False):
|
||||||
type: str
|
type: str
|
||||||
text: str
|
text: str
|
||||||
cache_control: Optional[dict]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesRequest(TypedDict, total=False):
|
class AnthropicMessagesRequest(TypedDict, total=False):
|
||||||
|
|
|
@ -354,14 +354,18 @@ class ChatCompletionImageObject(TypedDict):
|
||||||
image_url: ChatCompletionImageUrlObject
|
image_url: ChatCompletionImageUrlObject
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionUserMessage(TypedDict):
|
class OpenAIChatCompletionUserMessage(TypedDict):
|
||||||
role: Literal["user"]
|
role: Literal["user"]
|
||||||
content: Union[
|
content: Union[
|
||||||
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionAssistantMessage(TypedDict, total=False):
|
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
|
||||||
|
cache_control: ChatCompletionCachedContent
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
|
||||||
role: Required[Literal["assistant"]]
|
role: Required[Literal["assistant"]]
|
||||||
content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
|
content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
|
||||||
name: Optional[str]
|
name: Optional[str]
|
||||||
|
@ -369,6 +373,10 @@ class ChatCompletionAssistantMessage(TypedDict, total=False):
|
||||||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
|
||||||
|
cache_control: ChatCompletionCachedContent
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionToolMessage(TypedDict):
|
class ChatCompletionToolMessage(TypedDict):
|
||||||
role: Literal["tool"]
|
role: Literal["tool"]
|
||||||
content: str
|
content: str
|
||||||
|
@ -381,12 +389,16 @@ class ChatCompletionFunctionMessage(TypedDict):
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionSystemMessage(TypedDict, total=False):
|
class OpenAIChatCompletionSystemMessage(TypedDict, total=False):
|
||||||
role: Required[Literal["system"]]
|
role: Required[Literal["system"]]
|
||||||
content: Required[Union[str, List]]
|
content: Required[Union[str, List]]
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionSystemMessage(OpenAIChatCompletionSystemMessage, total=False):
|
||||||
|
cache_control: ChatCompletionCachedContent
|
||||||
|
|
||||||
|
|
||||||
AllMessageValues = Union[
|
AllMessageValues = Union[
|
||||||
ChatCompletionUserMessage,
|
ChatCompletionUserMessage,
|
||||||
ChatCompletionAssistantMessage,
|
ChatCompletionAssistantMessage,
|
||||||
|
|
|
@ -8547,11 +8547,6 @@ class CustomStreamWrapper:
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle_huggingface_chunk(self, chunk):
|
def handle_huggingface_chunk(self, chunk):
|
||||||
|
@ -8595,11 +8590,6 @@ class CustomStreamWrapper:
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle_ai21_chunk(self, chunk): # fake streaming
|
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||||
|
@ -8826,11 +8816,6 @@ class CustomStreamWrapper:
|
||||||
"usage": usage,
|
"usage": usage,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle_azure_text_completion_chunk(self, chunk):
|
def handle_azure_text_completion_chunk(self, chunk):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue