LiteLLM Minor fixes + improvements (08/04/2024) (#5505)

* Minor IAM AWS OIDC Improvements (#5246)

* AWS IAM: Temporary tokens are valid across all regions after being issued, so it is wasteful to request one for each region.

* AWS IAM: Include an inline policy, to help reduce misuse of overly permissive IAM roles.

* (test_bedrock_completion.py): Ensure we are testing cross AWS region OIDC flow.

* fix(router.py): log rejected requests

Fixes https://github.com/BerriAI/litellm/issues/5498

* refactor: don't use verbose_logger.exception, if exception is raised

User might already have handling for this. But alerting systems in prod will raise this as an unhandled error.

* fix(datadog.py): support setting datadog source as an env var

Fixes https://github.com/BerriAI/litellm/issues/5508

* docs(logging.md): add dd_source to datadog docs

* fix(proxy_server.py): expose `/customer/list` endpoint for showing all customers

* (bedrock): Fix usage with Cloudflare AI Gateway, and proxies in general. (#5509)

* feat(anthropic.py): support 'cache_control' param for content when it is a string

* Revert "(bedrock): Fix usage with Cloudflare AI Gateway, and proxies in gener…" (#5519)

This reverts commit 3fac0349c2.

* refactor: ci/cd run again

---------

Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
This commit is contained in:
Krish Dholakia 2024-09-04 22:16:55 -07:00 committed by GitHub
parent cdc312d51d
commit 1e7e538261
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 383 additions and 247 deletions

View file

@ -1,12 +1,12 @@
repos: repos:
- repo: local - repo: local
hooks: hooks:
- id: mypy # - id: mypy
name: mypy # name: mypy
entry: python3 -m mypy --ignore-missing-imports # entry: python3 -m mypy --ignore-missing-imports
language: system # language: system
types: [python] # types: [python]
files: ^litellm/ # files: ^litellm/
- id: isort - id: isort
name: isort name: isort
entry: isort entry: isort

View file

@ -1426,6 +1426,7 @@ litellm_settings:
```shell ```shell
DD_API_KEY="5f2d0f310***********" # your datadog API Key DD_API_KEY="5f2d0f310***********" # your datadog API Key
DD_SITE="us5.datadoghq.com" # your datadog base url DD_SITE="us5.datadoghq.com" # your datadog base url
DD_SOURCE="litellm_dev" # [OPTIONAL] your datadog source. use to differentiate dev vs. prod deployments
``` ```
**Step 3**: Start the proxy, make a test request **Step 3**: Start the proxy, make a test request

View file

@ -2039,10 +2039,7 @@ class DualCache(BaseCache):
return result return result
except Exception as e: except Exception as e:
verbose_logger.exception( raise e # don't log if exception is raised
f"LiteLLM Cache: Excepton async add_cache: {str(e)}"
)
raise e
async def async_set_cache_sadd( async def async_set_cache_sadd(
self, key, value: List, local_only: bool = False, **kwargs self, key, value: List, local_only: bool = False, **kwargs
@ -2069,10 +2066,7 @@ class DualCache(BaseCache):
return None return None
except Exception as e: except Exception as e:
verbose_logger.exception( raise e # don't log, if exception is raised
"LiteLLM Cache: Excepton async set_cache_sadd: {}".format(str(e))
)
raise e
def flush_cache(self): def flush_cache(self):
if self.in_memory_cache is not None: if self.in_memory_cache is not None:
@ -2543,7 +2537,6 @@ class Cache:
self.cache.set_cache(cache_key, cached_data, **kwargs) self.cache.set_cache(cache_key, cached_data, **kwargs)
except Exception as e: except Exception as e:
verbose_logger.exception(f"LiteLLM Cache: Excepton add_cache: {str(e)}") verbose_logger.exception(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
pass
async def async_add_cache(self, result, *args, **kwargs): async def async_add_cache(self, result, *args, **kwargs):
""" """

View file

@ -235,10 +235,7 @@ class BraintrustLogger(CustomLogger):
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
raise Exception(e.response.text) raise Exception(e.response.text)
except Exception as e: except Exception as e:
verbose_logger.exception( raise e # don't use verbose_logger.exception, if exception is raised
"Error logging to braintrust - Exception received - {}".format(str(e))
)
raise e
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
verbose_logger.debug("REACHES BRAINTRUST SUCCESS") verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
@ -360,10 +357,7 @@ class BraintrustLogger(CustomLogger):
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
raise Exception(e.response.text) raise Exception(e.response.text)
except Exception as e: except Exception as e:
verbose_logger.exception( raise e # don't use verbose_logger.exception, if exception is raised
"Error logging to braintrust - Exception received - {}".format(str(e))
)
raise e
def log_failure_event(self, kwargs, response_obj, start_time, end_time): def log_failure_event(self, kwargs, response_obj, start_time, end_time):
return super().log_failure_event(kwargs, response_obj, start_time, end_time) return super().log_failure_event(kwargs, response_obj, start_time, end_time)

View file

@ -1,11 +1,17 @@
#### What this does #### #### What this does ####
# On success + failure, log events to Datadog # On success + failure, log events to Datadog
import dotenv, os import datetime
import requests # type: ignore import os
import subprocess
import sys
import traceback import traceback
import datetime, subprocess, sys import uuid
import litellm, uuid
import dotenv
import requests # type: ignore
import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
@ -57,9 +63,9 @@ class DataDogLogger:
): ):
try: try:
# Define DataDog client # Define DataDog client
from datadog_api_client.v2.api.logs_api import LogsApi
from datadog_api_client.v2 import ApiClient from datadog_api_client.v2 import ApiClient
from datadog_api_client.v2.models import HTTPLogItem, HTTPLog from datadog_api_client.v2.api.logs_api import LogsApi
from datadog_api_client.v2.models import HTTPLog, HTTPLogItem
verbose_logger.debug( verbose_logger.debug(
f"datadog Logging - Enters logging function for model {kwargs}" f"datadog Logging - Enters logging function for model {kwargs}"
@ -131,7 +137,7 @@ class DataDogLogger:
body = HTTPLog( body = HTTPLog(
[ [
HTTPLogItem( HTTPLogItem(
ddsource="litellm", ddsource=os.getenv("DD_SOURCE", "litellm"),
message=payload, message=payload,
service="litellm-server", service="litellm-server",
), ),

View file

@ -228,6 +228,54 @@ class AnthropicConfig:
return False return False
def translate_system_message(
self, messages: List[AllMessageValues]
) -> List[AnthropicSystemMessageContent]:
system_prompt_indices = []
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
for idx, message in enumerate(messages):
if message["role"] == "system":
valid_content: bool = False
system_message_block = ChatCompletionSystemMessage(**message)
if isinstance(system_message_block["content"], str):
anthropic_system_message_content = AnthropicSystemMessageContent(
type="text",
text=system_message_block["content"],
)
if "cache_control" in system_message_block:
anthropic_system_message_content["cache_control"] = (
system_message_block["cache_control"]
)
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
elif isinstance(message["content"], list):
for _content in message["content"]:
anthropic_system_message_content = (
AnthropicSystemMessageContent(
type=_content.get("type"),
text=_content.get("text"),
)
)
if "cache_control" in _content:
anthropic_system_message_content["cache_control"] = (
_content["cache_control"]
)
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
if valid_content:
system_prompt_indices.append(idx)
if len(system_prompt_indices) > 0:
for idx in reversed(system_prompt_indices):
messages.pop(idx)
return anthropic_system_message_list
### FOR [BETA] `/v1/messages` endpoint support ### FOR [BETA] `/v1/messages` endpoint support
def translatable_anthropic_params(self) -> List: def translatable_anthropic_params(self) -> List:
@ -314,7 +362,7 @@ class AnthropicConfig:
new_messages.append(user_message) new_messages.append(user_message)
if len(new_user_content_list) > 0: if len(new_user_content_list) > 0:
new_messages.append({"role": "user", "content": new_user_content_list}) new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
if len(tool_message_list) > 0: if len(tool_message_list) > 0:
new_messages.extend(tool_message_list) new_messages.extend(tool_message_list)
@ -940,45 +988,11 @@ class AnthropicChatCompletion(BaseLLM):
) )
else: else:
# Separate system prompt from rest of message # Separate system prompt from rest of message
system_prompt_indices = [] anthropic_system_message_list = AnthropicConfig().translate_system_message(
system_prompt = "" messages=messages
anthropic_system_message_list = None )
for idx, message in enumerate(messages):
if message["role"] == "system":
valid_content: bool = False
if isinstance(message["content"], str):
system_prompt += message["content"]
valid_content = True
elif isinstance(message["content"], list):
for _content in message["content"]:
anthropic_system_message_content = (
AnthropicSystemMessageContent(
type=_content.get("type"),
text=_content.get("text"),
)
)
if "cache_control" in _content:
anthropic_system_message_content["cache_control"] = (
_content["cache_control"]
)
if anthropic_system_message_list is None:
anthropic_system_message_list = []
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
if valid_content:
system_prompt_indices.append(idx)
if len(system_prompt_indices) > 0:
for idx in reversed(system_prompt_indices):
messages.pop(idx)
if len(system_prompt) > 0:
optional_params["system"] = system_prompt
# Handling anthropic API Prompt Caching # Handling anthropic API Prompt Caching
if anthropic_system_message_list is not None: if len(anthropic_system_message_list) > 0:
optional_params["system"] = anthropic_system_message_list optional_params["system"] = anthropic_system_message_list
# Format rest of message according to anthropic guidelines # Format rest of message according to anthropic guidelines
try: try:
@ -986,15 +1000,10 @@ class AnthropicChatCompletion(BaseLLM):
model=model, messages=messages, custom_llm_provider="anthropic" model=model, messages=messages, custom_llm_provider="anthropic"
) )
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.llms.anthropic.chat.py::completion() - Exception occurred - {}\nReceived Messages: {}".format(
str(e), messages
)
)
raise AnthropicError( raise AnthropicError(
status_code=400, status_code=400,
message="{}\nReceived Messages={}".format(str(e), messages), message="{}\nReceived Messages={}".format(str(e), messages),
) ) # don't use verbose_logger.exception, if exception is raised
## Load Config ## Load Config
config = litellm.AnthropicConfig.get_config() config = litellm.AnthropicConfig.get_config()

View file

@ -119,8 +119,6 @@ class BaseAWSLLM(BaseLLM):
"aws_web_identity_token": aws_web_identity_token, "aws_web_identity_token": aws_web_identity_token,
"aws_role_name": aws_role_name, "aws_role_name": aws_role_name,
"aws_session_name": aws_session_name, "aws_session_name": aws_session_name,
"aws_region_name": aws_region_name,
"aws_sts_endpoint": sts_endpoint,
} }
) )
@ -147,6 +145,7 @@ class BaseAWSLLM(BaseLLM):
RoleSessionName=aws_session_name, RoleSessionName=aws_session_name,
WebIdentityToken=oidc_token, WebIdentityToken=oidc_token,
DurationSeconds=3600, DurationSeconds=3600,
Policy='{"Version":"2012-10-17","Statement":[{"Sid":"BedrockLiteLLM","Effect":"Allow","Action":["bedrock:InvokeModel","bedrock:InvokeModelWithResponseStream"],"Resource":"*","Condition":{"Bool":{"aws:SecureTransport":"true"},"StringLike":{"aws:UserAgent":"litellm/*"}}}]}',
) )
iam_creds_dict = { iam_creds_dict = {
@ -164,6 +163,11 @@ class BaseAWSLLM(BaseLLM):
ttl=3600 - 60, ttl=3600 - 60,
) )
if sts_response["PackedPolicySize"] > 75:
verbose_logger.warning(
f"The policy size is greater than 75% of the allowed size, PackedPolicySize: {sts_response['PackedPolicySize']}"
)
session = boto3.Session(**iam_creds_dict) session = boto3.Session(**iam_creds_dict)
iam_creds = session.get_credentials() iam_creds = session.get_credentials()

View file

@ -423,13 +423,7 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
async for transformed_chunk in streamwrapper: async for transformed_chunk in streamwrapper:
yield transformed_chunk yield transformed_chunk
except Exception as e: except Exception as e:
verbose_logger.exception( raise e # don't use verbose_logger.exception, if exception is raised
"LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
str(e)
)
)
raise e
async def ollama_acompletion( async def ollama_acompletion(
@ -498,12 +492,7 @@ async def ollama_acompletion(
) )
return model_response return model_response
except Exception as e: except Exception as e:
verbose_logger.exception( raise e # don't use verbose_logger.exception, if exception is raised
"LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
str(e)
)
)
raise e
async def ollama_aembeddings( async def ollama_aembeddings(

View file

@ -583,8 +583,4 @@ async def ollama_acompletion(
) )
return model_response return model_response
except Exception as e: except Exception as e:
verbose_logger.exception( raise e # don't use verbose_logger.exception, if exception is raised
"LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
)
raise e

View file

@ -168,9 +168,6 @@ def completion(
choices_list.append(choice_obj) choices_list.append(choice_obj)
model_response.choices = choices_list # type: ignore model_response.choices = choices_list # type: ignore
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
)
raise PalmError( raise PalmError(
message=traceback.format_exc(), status_code=response.status_code message=traceback.format_exc(), status_code=response.status_code
) )

View file

@ -564,12 +564,9 @@ class PredibaseChatCompletion(BaseLLM):
for exception in litellm.LITELLM_EXCEPTION_TYPES: for exception in litellm.LITELLM_EXCEPTION_TYPES:
if isinstance(e, exception): if isinstance(e, exception):
raise e raise e
verbose_logger.exception( raise PredibaseError(
"litellm.llms.predibase.py::async_completion() - Exception occurred - {}".format( status_code=500, message="{}".format(str(e))
str(e) ) # don't use verbose_logger.exception, if exception is raised
)
)
raise PredibaseError(status_code=500, message="{}".format(str(e)))
return self.process_response( return self.process_response(
model=model, model=model,
response=response, response=response,

View file

@ -27,10 +27,13 @@ from litellm.types.completion import (
from litellm.types.llms.anthropic import * from litellm.types.llms.anthropic import *
from litellm.types.llms.bedrock import MessageBlock as BedrockMessageBlock from litellm.types.llms.bedrock import MessageBlock as BedrockMessageBlock
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAssistantMessage, ChatCompletionAssistantMessage,
ChatCompletionAssistantToolCall,
ChatCompletionFunctionMessage, ChatCompletionFunctionMessage,
ChatCompletionToolCallFunctionChunk, ChatCompletionToolCallFunctionChunk,
ChatCompletionToolMessage, ChatCompletionToolMessage,
ChatCompletionUserMessage,
) )
from litellm.types.utils import GenericImageParsingChunk from litellm.types.utils import GenericImageParsingChunk
@ -493,10 +496,9 @@ def hf_chat_template(model: str, messages: list, chat_template: Optional[Any] =
return rendered_text return rendered_text
except Exception as e: except Exception as e:
verbose_logger.exception( raise Exception(
"Error rendering huggingface chat template - {}".format(str(e)) f"Error rendering template - {str(e)}"
) ) # don't use verbose_logger.exception, if exception is raised
raise Exception(f"Error rendering template - {str(e)}")
# Anthropic template # Anthropic template
@ -1171,7 +1173,9 @@ def convert_to_gemini_tool_call_result(
return _part return _part
def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResultParam: def convert_to_anthropic_tool_result(
message: Union[dict, ChatCompletionToolMessage, ChatCompletionFunctionMessage]
) -> AnthropicMessagesToolResultParam:
""" """
OpenAI message with a tool result looks like: OpenAI message with a tool result looks like:
{ {
@ -1215,7 +1219,7 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
return anthropic_tool_result return anthropic_tool_result
if message["role"] == "function": if message["role"] == "function":
content = message.get("content") # type: ignore content = message.get("content") # type: ignore
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4()) tool_call_id = message.get("tool_call_id") or str(uuid.uuid4()) # type: ignore
anthropic_tool_result = AnthropicMessagesToolResultParam( anthropic_tool_result = AnthropicMessagesToolResultParam(
type="tool_result", tool_use_id=tool_call_id, content=content type="tool_result", tool_use_id=tool_call_id, content=content
) )
@ -1230,7 +1234,7 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
def convert_function_to_anthropic_tool_invoke( def convert_function_to_anthropic_tool_invoke(
function_call, function_call: Union[dict, ChatCompletionToolCallFunctionChunk],
) -> List[AnthropicMessagesToolUseParam]: ) -> List[AnthropicMessagesToolUseParam]:
try: try:
anthropic_tool_invoke = [ anthropic_tool_invoke = [
@ -1247,7 +1251,7 @@ def convert_function_to_anthropic_tool_invoke(
def convert_to_anthropic_tool_invoke( def convert_to_anthropic_tool_invoke(
tool_calls: list, tool_calls: List[ChatCompletionAssistantToolCall],
) -> List[AnthropicMessagesToolUseParam]: ) -> List[AnthropicMessagesToolUseParam]:
""" """
OpenAI tool invokes: OpenAI tool invokes:
@ -1307,17 +1311,19 @@ def add_cache_control_to_content(
anthropic_content_element: Union[ anthropic_content_element: Union[
dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam
], ],
orignal_content_element: dict, orignal_content_element: Union[dict, AllMessageValues],
): ):
if "cache_control" in orignal_content_element: cache_control_param = orignal_content_element.get("cache_control")
anthropic_content_element["cache_control"] = orignal_content_element[ if cache_control_param is not None and isinstance(cache_control_param, dict):
"cache_control" transformed_param = ChatCompletionCachedContent(**cache_control_param) # type: ignore
]
anthropic_content_element["cache_control"] = transformed_param
return anthropic_content_element return anthropic_content_element
def anthropic_messages_pt( def anthropic_messages_pt(
messages: list, messages: List[AllMessageValues],
model: str, model: str,
llm_provider: str, llm_provider: str,
) -> List[ ) -> List[
@ -1348,10 +1354,21 @@ def anthropic_messages_pt(
while msg_i < len(messages): while msg_i < len(messages):
user_content: List[AnthropicMessagesUserMessageValues] = [] user_content: List[AnthropicMessagesUserMessageValues] = []
init_msg_i = msg_i init_msg_i = msg_i
if isinstance(messages[msg_i], BaseModel):
messages[msg_i] = dict(messages[msg_i]) # type: ignore
## MERGE CONSECUTIVE USER CONTENT ## ## MERGE CONSECUTIVE USER CONTENT ##
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types: while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
if isinstance(messages[msg_i]["content"], list): user_message_types_block: Union[
for m in messages[msg_i]["content"]: ChatCompletionToolMessage,
ChatCompletionUserMessage,
ChatCompletionFunctionMessage,
] = messages[
msg_i
] # type: ignore
if user_message_types_block["content"] and isinstance(
user_message_types_block["content"], list
):
for m in user_message_types_block["content"]:
if m.get("type", "") == "image_url": if m.get("type", "") == "image_url":
image_chunk = convert_to_anthropic_image_obj( image_chunk = convert_to_anthropic_image_obj(
m["image_url"]["url"] m["image_url"]["url"]
@ -1382,15 +1399,24 @@ def anthropic_messages_pt(
) )
user_content.append(anthropic_content_element) user_content.append(anthropic_content_element)
elif ( elif (
messages[msg_i]["role"] == "tool" user_message_types_block["role"] == "tool"
or messages[msg_i]["role"] == "function" or user_message_types_block["role"] == "function"
): ):
# OpenAI's tool message content will always be a string # OpenAI's tool message content will always be a string
user_content.append(convert_to_anthropic_tool_result(messages[msg_i]))
else:
user_content.append( user_content.append(
{"type": "text", "text": messages[msg_i]["content"]} convert_to_anthropic_tool_result(user_message_types_block)
) )
elif isinstance(user_message_types_block["content"], str):
_anthropic_content_text_element: AnthropicMessagesTextParam = {
"type": "text",
"text": user_message_types_block["content"],
}
anthropic_content_element = add_cache_control_to_content(
anthropic_content_element=_anthropic_content_text_element,
orignal_content_element=user_message_types_block,
)
user_content.append(anthropic_content_element)
msg_i += 1 msg_i += 1
@ -1400,10 +1426,11 @@ def anthropic_messages_pt(
assistant_content: List[AnthropicMessagesAssistantMessageValues] = [] assistant_content: List[AnthropicMessagesAssistantMessageValues] = []
## MERGE CONSECUTIVE ASSISTANT CONTENT ## ## MERGE CONSECUTIVE ASSISTANT CONTENT ##
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant": while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
if "content" in messages[msg_i] and isinstance( assistant_content_block: ChatCompletionAssistantMessage = messages[msg_i] # type: ignore
messages[msg_i]["content"], list if "content" in assistant_content_block and isinstance(
assistant_content_block["content"], list
): ):
for m in messages[msg_i]["content"]: for m in assistant_content_block["content"]:
# handle text # handle text
if ( if (
m.get("type", "") == "text" and len(m.get("text", "")) > 0 m.get("type", "") == "text" and len(m.get("text", "")) > 0
@ -1417,35 +1444,37 @@ def anthropic_messages_pt(
) )
assistant_content.append(anthropic_message) assistant_content.append(anthropic_message)
elif ( elif (
"content" in messages[msg_i] "content" in assistant_content_block
and isinstance(messages[msg_i]["content"], str) and isinstance(assistant_content_block["content"], str)
and len(messages[msg_i]["content"]) and assistant_content_block[
> 0 # don't pass empty text blocks. anthropic api raises errors. "content"
] # don't pass empty text blocks. anthropic api raises errors.
): ):
_anthropic_text_content_element = { _anthropic_text_content_element = {
"type": "text", "type": "text",
"text": messages[msg_i]["content"], "text": assistant_content_block["content"],
} }
anthropic_content_element = add_cache_control_to_content( anthropic_content_element = add_cache_control_to_content(
anthropic_content_element=_anthropic_text_content_element, anthropic_content_element=_anthropic_text_content_element,
orignal_content_element=messages[msg_i], orignal_content_element=assistant_content_block,
) )
assistant_content.append(anthropic_content_element) assistant_content.append(anthropic_content_element)
if messages[msg_i].get( assistant_tool_calls = assistant_content_block.get("tool_calls")
"tool_calls", [] if (
assistant_tool_calls is not None
): # support assistant tool invoke conversion ): # support assistant tool invoke conversion
assistant_content.extend( assistant_content.extend(
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"]) convert_to_anthropic_tool_invoke(assistant_tool_calls)
) )
if messages[msg_i].get("function_call"): assistant_function_call = assistant_content_block.get("function_call")
if assistant_function_call is not None:
assistant_content.extend( assistant_content.extend(
convert_function_to_anthropic_tool_invoke( convert_function_to_anthropic_tool_invoke(assistant_function_call)
messages[msg_i]["function_call"]
)
) )
msg_i += 1 msg_i += 1

View file

@ -491,14 +491,9 @@ class CodestralTextCompletion(BaseLLM):
message="HTTPStatusError - {}".format(e.response.text), message="HTTPStatusError - {}".format(e.response.text),
) )
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.llms.text_completion_codestral.py::async_completion() - Exception occurred - {}".format(
str(e)
)
)
raise TextCompletionCodestralError( raise TextCompletionCodestralError(
status_code=500, message="{}".format(str(e)) status_code=500, message="{}".format(str(e))
) ) # don't use verbose_logger.exception, if exception is raised
return self.process_text_completion_response( return self.process_text_completion_response(
model=model, model=model,
response=response, response=response,

View file

@ -445,9 +445,6 @@ async def acompletion(
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls) ) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
return response return response
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.main.py::acompletion() - Exception occurred - {}".format(str(e))
)
custom_llm_provider = custom_llm_provider or "openai" custom_llm_provider = custom_llm_provider or "openai"
raise exception_type( raise exception_type(
model=model, model=model,
@ -616,9 +613,6 @@ def mock_completion(
except Exception as e: except Exception as e:
if isinstance(e, openai.APIError): if isinstance(e, openai.APIError):
raise e raise e
verbose_logger.exception(
"litellm.mock_completion(): Exception occured - {}".format(str(e))
)
raise Exception("Mock completion response failed") raise Exception("Mock completion response failed")
@ -5125,9 +5119,6 @@ async def ahealth_check(
response = {} # args like remaining ratelimit etc. response = {} # args like remaining ratelimit etc.
return response return response
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.ahealth_check(): Exception occured - {}".format(str(e))
)
stack_trace = traceback.format_exc() stack_trace = traceback.format_exc()
if isinstance(stack_trace, str): if isinstance(stack_trace, str):
stack_trace = stack_trace[:1000] stack_trace = stack_trace[:1000]

View file

@ -1,6 +1,16 @@
model_list: model_list:
- model_name: "whisper" - model_name: gpt-4o-mini-2024-07-18
litellm_params: litellm_params:
model: "azure/azure-whisper" api_key: API_KEY
api_key: os.environ/AZURE_EUROPE_API_KEY model: openai/gpt-4o-mini-2024-07-18
api_base: "https://my-endpoint-europe-berri-992.openai.azure.com/" rpm: 0
tpm: 100
router_settings:
num_retries: 0
routing_strategy: usage-based-routing-v2
timeout: 10
litellm_settings:
callbacks: custom_callbacks.proxy_handler_instance

View file

@ -386,7 +386,6 @@ async def user_api_key_auth(
parent_otel_span=parent_otel_span, parent_otel_span=parent_otel_span,
) )
#### ELSE #### #### ELSE ####
## CHECK PASS-THROUGH ENDPOINTS ## ## CHECK PASS-THROUGH ENDPOINTS ##
if pass_through_endpoints is not None: if pass_through_endpoints is not None:
for endpoint in pass_through_endpoints: for endpoint in pass_through_endpoints:

View file

@ -1,66 +1,10 @@
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
import litellm
# This file includes the custom callbacks for LiteLLM Proxy
# Once defined, these can be passed in proxy_config.yaml
class MyCustomHandler(CustomLogger): class MyCustomHandler(CustomLogger):
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call") # noqa
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call") # noqa
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream") # noqa
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print("On Success") # noqa
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Failure") # noqa
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"ishaan async_log_success_event") # noqa
# log: key, user, model, prompt, response, tokens, cost
# Access kwargs passed to litellm.completion()
model = kwargs.get("model", None)
messages = kwargs.get("messages", None)
user = kwargs.get("user", None)
# Access litellm_params passed to litellm.completion(), example access `metadata`
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get(
"metadata", {}
) # headers passed to LiteLLM proxy, can be found here
return
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try: # print("Call failed")
print(f"On Async Failure !") # noqa pass
print("\nkwargs", kwargs) # noqa
# Access kwargs passed to litellm.completion()
model = kwargs.get("model", None)
messages = kwargs.get("messages", None)
user = kwargs.get("user", None)
# Access litellm_params passed to litellm.completion(), example access `metadata`
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get(
"metadata", {}
) # headers passed to LiteLLM proxy, can be found here
# Acess Exceptions & Traceback
exception_event = kwargs.get("exception", None)
traceback_event = kwargs.get("traceback_exception", None)
# Calculate cost using litellm.completion_cost()
except Exception as e:
print(f"Exception: {e}") # noqa
proxy_handler_instance = MyCustomHandler() proxy_handler_instance = MyCustomHandler()
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy

View file

@ -6183,6 +6183,64 @@ async def delete_end_user(
pass pass
@router.get(
"/customer/list",
tags=["Customer Management"],
dependencies=[Depends(user_api_key_auth)],
response_model=List[LiteLLM_EndUserTable],
)
@router.get(
"/end_user/list",
tags=["Customer Management"],
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def list_team(
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[Admin-only] List all available customers
```
curl --location --request GET 'http://0.0.0.0:4000/customer/list' \
--header 'Authorization: Bearer sk-1234'
```
"""
from litellm.proxy.proxy_server import (
_duration_in_seconds,
create_audit_log_for_update,
litellm_proxy_admin_name,
prisma_client,
)
if (
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
):
raise HTTPException(
status_code=401,
detail={
"error": "Admin-only endpoint. Your user role={}".format(
user_api_key_dict.user_role
)
},
)
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
response = await prisma_client.db.litellm_endusertable.find_many()
returned_response: List[LiteLLM_EndUserTable] = []
for item in response:
returned_response.append(LiteLLM_EndUserTable(**item.model_dump()))
return returned_response
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs): async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
if premium_user is not True: if premium_user is not True:
return return

View file

@ -47,6 +47,7 @@ from litellm._logging import verbose_router_logger
from litellm.assistants.main import AssistantDeleted from litellm.assistants.main import AssistantDeleted
from litellm.caching import DualCache, InMemoryCache, RedisCache from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.azure import get_azure_ad_token_from_oidc from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
@ -783,6 +784,10 @@ class Router:
} }
) )
logging_obj: Optional[LiteLLMLogging] = kwargs.get(
"litellm_logging_obj", None
)
rpm_semaphore = self._get_client( rpm_semaphore = self._get_client(
deployment=deployment, deployment=deployment,
kwargs=kwargs, kwargs=kwargs,
@ -797,11 +802,13 @@ class Router:
- If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe) - If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
""" """
await self.async_routing_strategy_pre_call_checks( await self.async_routing_strategy_pre_call_checks(
deployment=deployment deployment=deployment, logging_obj=logging_obj
) )
response = await _response response = await _response
else: else:
await self.async_routing_strategy_pre_call_checks(deployment=deployment) await self.async_routing_strategy_pre_call_checks(
deployment=deployment, logging_obj=logging_obj
)
response = await _response response = await _response
## CHECK CONTENT FILTER ERROR ## ## CHECK CONTENT FILTER ERROR ##
@ -3860,7 +3867,9 @@ class Router:
if isinstance(_callback, CustomLogger): if isinstance(_callback, CustomLogger):
response = _callback.pre_call_check(deployment) response = _callback.pre_call_check(deployment)
async def async_routing_strategy_pre_call_checks(self, deployment: dict): async def async_routing_strategy_pre_call_checks(
self, deployment: dict, logging_obj: Optional[LiteLLMLogging] = None
):
""" """
For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore. For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.
@ -3875,8 +3884,22 @@ class Router:
for _callback in litellm.callbacks: for _callback in litellm.callbacks:
if isinstance(_callback, CustomLogger): if isinstance(_callback, CustomLogger):
try: try:
response = await _callback.async_pre_call_check(deployment) _ = await _callback.async_pre_call_check(deployment)
except litellm.RateLimitError as e: except litellm.RateLimitError as e:
## LOG FAILURE EVENT
if logging_obj is not None:
asyncio.create_task(
logging_obj.async_failure_handler(
exception=e,
traceback_exception=traceback.format_exc(),
end_time=time.time(),
)
)
## LOGGING
threading.Thread(
target=logging_obj.failure_handler,
args=(e, traceback.format_exc()),
).start() # log response
self._set_cooldown_deployments( self._set_cooldown_deployments(
exception_status=e.status_code, exception_status=e.status_code,
original_exception=e, original_exception=e,
@ -3885,6 +3908,20 @@ class Router:
) )
raise e raise e
except Exception as e: except Exception as e:
## LOG FAILURE EVENT
if logging_obj is not None:
asyncio.create_task(
logging_obj.async_failure_handler(
exception=e,
traceback_exception=traceback.format_exc(),
end_time=time.time(),
)
)
## LOGGING
threading.Thread(
target=logging_obj.failure_handler,
args=(e, traceback.format_exc()),
).start() # log response
raise e raise e
def _generate_model_id(self, model_group: str, litellm_params: dict): def _generate_model_id(self, model_group: str, litellm_params: dict):

View file

@ -222,6 +222,94 @@ async def test_anthropic_api_prompt_caching_basic():
) )
@pytest.mark.asyncio()
async def test_anthropic_api_prompt_caching_with_content_str():
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
system_message = [
{
"role": "system",
"content": "Here is the full text of a complex legal agreement",
"cache_control": {"type": "ephemeral"},
},
]
translated_system_message = litellm.AnthropicConfig().translate_system_message(
messages=system_message
)
assert translated_system_message == [
# System Message
{
"type": "text",
"text": "Here is the full text of a complex legal agreement",
"cache_control": {"type": "ephemeral"},
}
]
user_messages = [
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
{
"role": "user",
"content": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
},
{
"role": "assistant",
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
},
# The final turn is marked with cache-control, for continuing in followups.
{
"role": "user",
"content": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
},
]
translated_messages = anthropic_messages_pt(
messages=user_messages,
model="claude-3-5-sonnet-20240620",
llm_provider="anthropic",
)
expected_messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
}
],
},
{
"role": "assistant",
"content": [
{
"type": "text",
"text": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
}
],
},
# The final turn is marked with cache-control, for continuing in followups.
{
"role": "user",
"content": [
{
"type": "text",
"text": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
}
],
},
]
assert len(translated_messages) == len(expected_messages)
for idx, i in enumerate(translated_messages):
assert (
i == expected_messages[idx]
), "Error on idx={}. Got={}, Expected={}".format(idx, i, expected_messages[idx])
@pytest.mark.asyncio() @pytest.mark.asyncio()
async def test_anthropic_api_prompt_caching_no_headers(): async def test_anthropic_api_prompt_caching_no_headers():
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -616,8 +616,8 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
aws_region_name=aws_region_name, aws_region_name=aws_region_name,
aws_web_identity_token=aws_web_identity_token, aws_web_identity_token=aws_web_identity_token,
aws_role_name=aws_role_name, aws_role_name=aws_role_name,
aws_session_name="my-test-session", aws_session_name="cross-region-test",
aws_sts_endpoint="https://sts-fips.us-west-2.amazonaws.com", aws_sts_endpoint="https://sts-fips.us-east-2.amazonaws.com",
aws_bedrock_runtime_endpoint="https://bedrock-runtime-fips.us-west-2.amazonaws.com", aws_bedrock_runtime_endpoint="https://bedrock-runtime-fips.us-west-2.amazonaws.com",
) )
# Add any assertions here to check the response # Add any assertions here to check the response

View file

@ -3,6 +3,8 @@ from typing import Any, Dict, Iterable, List, Optional, Union
from pydantic import BaseModel, validator from pydantic import BaseModel, validator
from typing_extensions import Literal, Required, TypedDict from typing_extensions import Literal, Required, TypedDict
from .openai import ChatCompletionCachedContent
class AnthropicMessagesToolChoice(TypedDict, total=False): class AnthropicMessagesToolChoice(TypedDict, total=False):
type: Required[Literal["auto", "any", "tool"]] type: Required[Literal["auto", "any", "tool"]]
@ -18,7 +20,7 @@ class AnthropicMessagesTool(TypedDict, total=False):
class AnthropicMessagesTextParam(TypedDict, total=False): class AnthropicMessagesTextParam(TypedDict, total=False):
type: Literal["text"] type: Literal["text"]
text: str text: str
cache_control: Optional[dict] cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
class AnthropicMessagesToolUseParam(TypedDict): class AnthropicMessagesToolUseParam(TypedDict):
@ -58,7 +60,7 @@ class AnthropicImageParamSource(TypedDict):
class AnthropicMessagesImageParam(TypedDict, total=False): class AnthropicMessagesImageParam(TypedDict, total=False):
type: Literal["image"] type: Literal["image"]
source: AnthropicImageParamSource source: AnthropicImageParamSource
cache_control: Optional[dict] cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
class AnthropicMessagesToolResultContent(TypedDict): class AnthropicMessagesToolResultContent(TypedDict):
@ -97,7 +99,7 @@ class AnthropicMetadata(TypedDict, total=False):
class AnthropicSystemMessageContent(TypedDict, total=False): class AnthropicSystemMessageContent(TypedDict, total=False):
type: str type: str
text: str text: str
cache_control: Optional[dict] cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
class AnthropicMessagesRequest(TypedDict, total=False): class AnthropicMessagesRequest(TypedDict, total=False):

View file

@ -354,14 +354,18 @@ class ChatCompletionImageObject(TypedDict):
image_url: ChatCompletionImageUrlObject image_url: ChatCompletionImageUrlObject
class ChatCompletionUserMessage(TypedDict): class OpenAIChatCompletionUserMessage(TypedDict):
role: Literal["user"] role: Literal["user"]
content: Union[ content: Union[
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]] str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
] ]
class ChatCompletionAssistantMessage(TypedDict, total=False): class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
cache_control: ChatCompletionCachedContent
class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
role: Required[Literal["assistant"]] role: Required[Literal["assistant"]]
content: Optional[Union[str, Iterable[ChatCompletionTextObject]]] content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
name: Optional[str] name: Optional[str]
@ -369,6 +373,10 @@ class ChatCompletionAssistantMessage(TypedDict, total=False):
function_call: Optional[ChatCompletionToolCallFunctionChunk] function_call: Optional[ChatCompletionToolCallFunctionChunk]
class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
cache_control: ChatCompletionCachedContent
class ChatCompletionToolMessage(TypedDict): class ChatCompletionToolMessage(TypedDict):
role: Literal["tool"] role: Literal["tool"]
content: str content: str
@ -381,12 +389,16 @@ class ChatCompletionFunctionMessage(TypedDict):
name: str name: str
class ChatCompletionSystemMessage(TypedDict, total=False): class OpenAIChatCompletionSystemMessage(TypedDict, total=False):
role: Required[Literal["system"]] role: Required[Literal["system"]]
content: Required[Union[str, List]] content: Required[Union[str, List]]
name: str name: str
class ChatCompletionSystemMessage(OpenAIChatCompletionSystemMessage, total=False):
cache_control: ChatCompletionCachedContent
AllMessageValues = Union[ AllMessageValues = Union[
ChatCompletionUserMessage, ChatCompletionUserMessage,
ChatCompletionAssistantMessage, ChatCompletionAssistantMessage,

View file

@ -8547,11 +8547,6 @@ class CustomStreamWrapper:
"finish_reason": finish_reason, "finish_reason": finish_reason,
} }
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
str(e)
)
)
raise e raise e
def handle_huggingface_chunk(self, chunk): def handle_huggingface_chunk(self, chunk):
@ -8595,11 +8590,6 @@ class CustomStreamWrapper:
"finish_reason": finish_reason, "finish_reason": finish_reason,
} }
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
str(e)
)
)
raise e raise e
def handle_ai21_chunk(self, chunk): # fake streaming def handle_ai21_chunk(self, chunk): # fake streaming
@ -8826,11 +8816,6 @@ class CustomStreamWrapper:
"usage": usage, "usage": usage,
} }
except Exception as e: except Exception as e:
verbose_logger.exception(
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
str(e)
)
)
raise e raise e
def handle_azure_text_completion_chunk(self, chunk): def handle_azure_text_completion_chunk(self, chunk):