forked from phoenix/litellm-mirror
LiteLLM Minor fixes + improvements (08/04/2024) (#5505)
* Minor IAM AWS OIDC Improvements (#5246)
* AWS IAM: Temporary tokens are valid across all regions after being issued, so it is wasteful to request one for each region.
* AWS IAM: Include an inline policy, to help reduce misuse of overly permissive IAM roles.
* (test_bedrock_completion.py): Ensure we are testing cross AWS region OIDC flow.
* fix(router.py): log rejected requests
Fixes https://github.com/BerriAI/litellm/issues/5498
* refactor: don't use verbose_logger.exception, if exception is raised
User might already have handling for this. But alerting systems in prod will raise this as an unhandled error.
* fix(datadog.py): support setting datadog source as an env var
Fixes https://github.com/BerriAI/litellm/issues/5508
* docs(logging.md): add dd_source to datadog docs
* fix(proxy_server.py): expose `/customer/list` endpoint for showing all customers
* (bedrock): Fix usage with Cloudflare AI Gateway, and proxies in general. (#5509)
* feat(anthropic.py): support 'cache_control' param for content when it is a string
* Revert "(bedrock): Fix usage with Cloudflare AI Gateway, and proxies in gener…" (#5519)
This reverts commit 3fac0349c2
.
* refactor: ci/cd run again
---------
Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
This commit is contained in:
parent
cdc312d51d
commit
1e7e538261
24 changed files with 383 additions and 247 deletions
|
@ -1,12 +1,12 @@
|
|||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
entry: python3 -m mypy --ignore-missing-imports
|
||||
language: system
|
||||
types: [python]
|
||||
files: ^litellm/
|
||||
# - id: mypy
|
||||
# name: mypy
|
||||
# entry: python3 -m mypy --ignore-missing-imports
|
||||
# language: system
|
||||
# types: [python]
|
||||
# files: ^litellm/
|
||||
- id: isort
|
||||
name: isort
|
||||
entry: isort
|
||||
|
|
|
@ -1426,6 +1426,7 @@ litellm_settings:
|
|||
```shell
|
||||
DD_API_KEY="5f2d0f310***********" # your datadog API Key
|
||||
DD_SITE="us5.datadoghq.com" # your datadog base url
|
||||
DD_SOURCE="litellm_dev" # [OPTIONAL] your datadog source. use to differentiate dev vs. prod deployments
|
||||
```
|
||||
|
||||
**Step 3**: Start the proxy, make a test request
|
||||
|
|
|
@ -2039,10 +2039,7 @@ class DualCache(BaseCache):
|
|||
|
||||
return result
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
f"LiteLLM Cache: Excepton async add_cache: {str(e)}"
|
||||
)
|
||||
raise e
|
||||
raise e # don't log if exception is raised
|
||||
|
||||
async def async_set_cache_sadd(
|
||||
self, key, value: List, local_only: bool = False, **kwargs
|
||||
|
@ -2069,10 +2066,7 @@ class DualCache(BaseCache):
|
|||
|
||||
return None
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"LiteLLM Cache: Excepton async set_cache_sadd: {}".format(str(e))
|
||||
)
|
||||
raise e
|
||||
raise e # don't log, if exception is raised
|
||||
|
||||
def flush_cache(self):
|
||||
if self.in_memory_cache is not None:
|
||||
|
@ -2543,7 +2537,6 @@ class Cache:
|
|||
self.cache.set_cache(cache_key, cached_data, **kwargs)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
||||
pass
|
||||
|
||||
async def async_add_cache(self, result, *args, **kwargs):
|
||||
"""
|
||||
|
|
|
@ -235,10 +235,7 @@ class BraintrustLogger(CustomLogger):
|
|||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(e.response.text)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"Error logging to braintrust - Exception received - {}".format(str(e))
|
||||
)
|
||||
raise e
|
||||
raise e # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
|
||||
|
@ -360,10 +357,7 @@ class BraintrustLogger(CustomLogger):
|
|||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(e.response.text)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"Error logging to braintrust - Exception received - {}".format(str(e))
|
||||
)
|
||||
raise e
|
||||
raise e # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
return super().log_failure_event(kwargs, response_obj, start_time, end_time)
|
||||
|
|
|
@ -1,11 +1,17 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to Datadog
|
||||
|
||||
import dotenv, os
|
||||
import requests # type: ignore
|
||||
import datetime
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import datetime, subprocess, sys
|
||||
import litellm, uuid
|
||||
import uuid
|
||||
|
||||
import dotenv
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
|
||||
|
@ -57,9 +63,9 @@ class DataDogLogger:
|
|||
):
|
||||
try:
|
||||
# Define DataDog client
|
||||
from datadog_api_client.v2.api.logs_api import LogsApi
|
||||
from datadog_api_client.v2 import ApiClient
|
||||
from datadog_api_client.v2.models import HTTPLogItem, HTTPLog
|
||||
from datadog_api_client.v2.api.logs_api import LogsApi
|
||||
from datadog_api_client.v2.models import HTTPLog, HTTPLogItem
|
||||
|
||||
verbose_logger.debug(
|
||||
f"datadog Logging - Enters logging function for model {kwargs}"
|
||||
|
@ -131,7 +137,7 @@ class DataDogLogger:
|
|||
body = HTTPLog(
|
||||
[
|
||||
HTTPLogItem(
|
||||
ddsource="litellm",
|
||||
ddsource=os.getenv("DD_SOURCE", "litellm"),
|
||||
message=payload,
|
||||
service="litellm-server",
|
||||
),
|
||||
|
|
|
@ -228,6 +228,54 @@ class AnthropicConfig:
|
|||
|
||||
return False
|
||||
|
||||
def translate_system_message(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> List[AnthropicSystemMessageContent]:
|
||||
system_prompt_indices = []
|
||||
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
valid_content: bool = False
|
||||
system_message_block = ChatCompletionSystemMessage(**message)
|
||||
if isinstance(system_message_block["content"], str):
|
||||
anthropic_system_message_content = AnthropicSystemMessageContent(
|
||||
type="text",
|
||||
text=system_message_block["content"],
|
||||
)
|
||||
if "cache_control" in system_message_block:
|
||||
anthropic_system_message_content["cache_control"] = (
|
||||
system_message_block["cache_control"]
|
||||
)
|
||||
anthropic_system_message_list.append(
|
||||
anthropic_system_message_content
|
||||
)
|
||||
valid_content = True
|
||||
elif isinstance(message["content"], list):
|
||||
for _content in message["content"]:
|
||||
anthropic_system_message_content = (
|
||||
AnthropicSystemMessageContent(
|
||||
type=_content.get("type"),
|
||||
text=_content.get("text"),
|
||||
)
|
||||
)
|
||||
if "cache_control" in _content:
|
||||
anthropic_system_message_content["cache_control"] = (
|
||||
_content["cache_control"]
|
||||
)
|
||||
|
||||
anthropic_system_message_list.append(
|
||||
anthropic_system_message_content
|
||||
)
|
||||
valid_content = True
|
||||
|
||||
if valid_content:
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
for idx in reversed(system_prompt_indices):
|
||||
messages.pop(idx)
|
||||
|
||||
return anthropic_system_message_list
|
||||
|
||||
### FOR [BETA] `/v1/messages` endpoint support
|
||||
|
||||
def translatable_anthropic_params(self) -> List:
|
||||
|
@ -314,7 +362,7 @@ class AnthropicConfig:
|
|||
new_messages.append(user_message)
|
||||
|
||||
if len(new_user_content_list) > 0:
|
||||
new_messages.append({"role": "user", "content": new_user_content_list})
|
||||
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
|
||||
|
||||
if len(tool_message_list) > 0:
|
||||
new_messages.extend(tool_message_list)
|
||||
|
@ -940,45 +988,11 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
)
|
||||
else:
|
||||
# Separate system prompt from rest of message
|
||||
system_prompt_indices = []
|
||||
system_prompt = ""
|
||||
anthropic_system_message_list = None
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
valid_content: bool = False
|
||||
if isinstance(message["content"], str):
|
||||
system_prompt += message["content"]
|
||||
valid_content = True
|
||||
elif isinstance(message["content"], list):
|
||||
for _content in message["content"]:
|
||||
anthropic_system_message_content = (
|
||||
AnthropicSystemMessageContent(
|
||||
type=_content.get("type"),
|
||||
text=_content.get("text"),
|
||||
)
|
||||
)
|
||||
if "cache_control" in _content:
|
||||
anthropic_system_message_content["cache_control"] = (
|
||||
_content["cache_control"]
|
||||
)
|
||||
|
||||
if anthropic_system_message_list is None:
|
||||
anthropic_system_message_list = []
|
||||
anthropic_system_message_list.append(
|
||||
anthropic_system_message_content
|
||||
)
|
||||
valid_content = True
|
||||
|
||||
if valid_content:
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
for idx in reversed(system_prompt_indices):
|
||||
messages.pop(idx)
|
||||
if len(system_prompt) > 0:
|
||||
optional_params["system"] = system_prompt
|
||||
|
||||
anthropic_system_message_list = AnthropicConfig().translate_system_message(
|
||||
messages=messages
|
||||
)
|
||||
# Handling anthropic API Prompt Caching
|
||||
if anthropic_system_message_list is not None:
|
||||
if len(anthropic_system_message_list) > 0:
|
||||
optional_params["system"] = anthropic_system_message_list
|
||||
# Format rest of message according to anthropic guidelines
|
||||
try:
|
||||
|
@ -986,15 +1000,10 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.llms.anthropic.chat.py::completion() - Exception occurred - {}\nReceived Messages: {}".format(
|
||||
str(e), messages
|
||||
)
|
||||
)
|
||||
raise AnthropicError(
|
||||
status_code=400,
|
||||
message="{}\nReceived Messages={}".format(str(e), messages),
|
||||
)
|
||||
) # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
## Load Config
|
||||
config = litellm.AnthropicConfig.get_config()
|
||||
|
|
|
@ -119,8 +119,6 @@ class BaseAWSLLM(BaseLLM):
|
|||
"aws_web_identity_token": aws_web_identity_token,
|
||||
"aws_role_name": aws_role_name,
|
||||
"aws_session_name": aws_session_name,
|
||||
"aws_region_name": aws_region_name,
|
||||
"aws_sts_endpoint": sts_endpoint,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -147,6 +145,7 @@ class BaseAWSLLM(BaseLLM):
|
|||
RoleSessionName=aws_session_name,
|
||||
WebIdentityToken=oidc_token,
|
||||
DurationSeconds=3600,
|
||||
Policy='{"Version":"2012-10-17","Statement":[{"Sid":"BedrockLiteLLM","Effect":"Allow","Action":["bedrock:InvokeModel","bedrock:InvokeModelWithResponseStream"],"Resource":"*","Condition":{"Bool":{"aws:SecureTransport":"true"},"StringLike":{"aws:UserAgent":"litellm/*"}}}]}',
|
||||
)
|
||||
|
||||
iam_creds_dict = {
|
||||
|
@ -164,6 +163,11 @@ class BaseAWSLLM(BaseLLM):
|
|||
ttl=3600 - 60,
|
||||
)
|
||||
|
||||
if sts_response["PackedPolicySize"] > 75:
|
||||
verbose_logger.warning(
|
||||
f"The policy size is greater than 75% of the allowed size, PackedPolicySize: {sts_response['PackedPolicySize']}"
|
||||
)
|
||||
|
||||
session = boto3.Session(**iam_creds_dict)
|
||||
|
||||
iam_creds = session.get_credentials()
|
||||
|
|
|
@ -423,13 +423,7 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
async for transformed_chunk in streamwrapper:
|
||||
yield transformed_chunk
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
|
||||
raise e
|
||||
raise e # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
|
||||
async def ollama_acompletion(
|
||||
|
@ -498,12 +492,7 @@ async def ollama_acompletion(
|
|||
)
|
||||
return model_response
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
raise e
|
||||
raise e # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
|
||||
async def ollama_aembeddings(
|
||||
|
|
|
@ -583,8 +583,4 @@ async def ollama_acompletion(
|
|||
)
|
||||
return model_response
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
|
||||
)
|
||||
|
||||
raise e
|
||||
raise e # don't use verbose_logger.exception, if exception is raised
|
||||
|
|
|
@ -168,9 +168,6 @@ def completion(
|
|||
choices_list.append(choice_obj)
|
||||
model_response.choices = choices_list # type: ignore
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
|
||||
)
|
||||
raise PalmError(
|
||||
message=traceback.format_exc(), status_code=response.status_code
|
||||
)
|
||||
|
|
|
@ -564,12 +564,9 @@ class PredibaseChatCompletion(BaseLLM):
|
|||
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
||||
if isinstance(e, exception):
|
||||
raise e
|
||||
verbose_logger.exception(
|
||||
"litellm.llms.predibase.py::async_completion() - Exception occurred - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
raise PredibaseError(status_code=500, message="{}".format(str(e)))
|
||||
raise PredibaseError(
|
||||
status_code=500, message="{}".format(str(e))
|
||||
) # don't use verbose_logger.exception, if exception is raised
|
||||
return self.process_response(
|
||||
model=model,
|
||||
response=response,
|
||||
|
|
|
@ -27,10 +27,13 @@ from litellm.types.completion import (
|
|||
from litellm.types.llms.anthropic import *
|
||||
from litellm.types.llms.bedrock import MessageBlock as BedrockMessageBlock
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAssistantToolCall,
|
||||
ChatCompletionFunctionMessage,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionUserMessage,
|
||||
)
|
||||
from litellm.types.utils import GenericImageParsingChunk
|
||||
|
||||
|
@ -493,10 +496,9 @@ def hf_chat_template(model: str, messages: list, chat_template: Optional[Any] =
|
|||
|
||||
return rendered_text
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"Error rendering huggingface chat template - {}".format(str(e))
|
||||
)
|
||||
raise Exception(f"Error rendering template - {str(e)}")
|
||||
raise Exception(
|
||||
f"Error rendering template - {str(e)}"
|
||||
) # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
|
||||
# Anthropic template
|
||||
|
@ -1171,7 +1173,9 @@ def convert_to_gemini_tool_call_result(
|
|||
return _part
|
||||
|
||||
|
||||
def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResultParam:
|
||||
def convert_to_anthropic_tool_result(
|
||||
message: Union[dict, ChatCompletionToolMessage, ChatCompletionFunctionMessage]
|
||||
) -> AnthropicMessagesToolResultParam:
|
||||
"""
|
||||
OpenAI message with a tool result looks like:
|
||||
{
|
||||
|
@ -1215,7 +1219,7 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
|
|||
return anthropic_tool_result
|
||||
if message["role"] == "function":
|
||||
content = message.get("content") # type: ignore
|
||||
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())
|
||||
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4()) # type: ignore
|
||||
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
||||
type="tool_result", tool_use_id=tool_call_id, content=content
|
||||
)
|
||||
|
@ -1230,7 +1234,7 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
|
|||
|
||||
|
||||
def convert_function_to_anthropic_tool_invoke(
|
||||
function_call,
|
||||
function_call: Union[dict, ChatCompletionToolCallFunctionChunk],
|
||||
) -> List[AnthropicMessagesToolUseParam]:
|
||||
try:
|
||||
anthropic_tool_invoke = [
|
||||
|
@ -1247,7 +1251,7 @@ def convert_function_to_anthropic_tool_invoke(
|
|||
|
||||
|
||||
def convert_to_anthropic_tool_invoke(
|
||||
tool_calls: list,
|
||||
tool_calls: List[ChatCompletionAssistantToolCall],
|
||||
) -> List[AnthropicMessagesToolUseParam]:
|
||||
"""
|
||||
OpenAI tool invokes:
|
||||
|
@ -1307,17 +1311,19 @@ def add_cache_control_to_content(
|
|||
anthropic_content_element: Union[
|
||||
dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam
|
||||
],
|
||||
orignal_content_element: dict,
|
||||
orignal_content_element: Union[dict, AllMessageValues],
|
||||
):
|
||||
if "cache_control" in orignal_content_element:
|
||||
anthropic_content_element["cache_control"] = orignal_content_element[
|
||||
"cache_control"
|
||||
]
|
||||
cache_control_param = orignal_content_element.get("cache_control")
|
||||
if cache_control_param is not None and isinstance(cache_control_param, dict):
|
||||
transformed_param = ChatCompletionCachedContent(**cache_control_param) # type: ignore
|
||||
|
||||
anthropic_content_element["cache_control"] = transformed_param
|
||||
|
||||
return anthropic_content_element
|
||||
|
||||
|
||||
def anthropic_messages_pt(
|
||||
messages: list,
|
||||
messages: List[AllMessageValues],
|
||||
model: str,
|
||||
llm_provider: str,
|
||||
) -> List[
|
||||
|
@ -1348,10 +1354,21 @@ def anthropic_messages_pt(
|
|||
while msg_i < len(messages):
|
||||
user_content: List[AnthropicMessagesUserMessageValues] = []
|
||||
init_msg_i = msg_i
|
||||
if isinstance(messages[msg_i], BaseModel):
|
||||
messages[msg_i] = dict(messages[msg_i]) # type: ignore
|
||||
## MERGE CONSECUTIVE USER CONTENT ##
|
||||
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
|
||||
if isinstance(messages[msg_i]["content"], list):
|
||||
for m in messages[msg_i]["content"]:
|
||||
user_message_types_block: Union[
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionUserMessage,
|
||||
ChatCompletionFunctionMessage,
|
||||
] = messages[
|
||||
msg_i
|
||||
] # type: ignore
|
||||
if user_message_types_block["content"] and isinstance(
|
||||
user_message_types_block["content"], list
|
||||
):
|
||||
for m in user_message_types_block["content"]:
|
||||
if m.get("type", "") == "image_url":
|
||||
image_chunk = convert_to_anthropic_image_obj(
|
||||
m["image_url"]["url"]
|
||||
|
@ -1382,15 +1399,24 @@ def anthropic_messages_pt(
|
|||
)
|
||||
user_content.append(anthropic_content_element)
|
||||
elif (
|
||||
messages[msg_i]["role"] == "tool"
|
||||
or messages[msg_i]["role"] == "function"
|
||||
user_message_types_block["role"] == "tool"
|
||||
or user_message_types_block["role"] == "function"
|
||||
):
|
||||
# OpenAI's tool message content will always be a string
|
||||
user_content.append(convert_to_anthropic_tool_result(messages[msg_i]))
|
||||
else:
|
||||
user_content.append(
|
||||
{"type": "text", "text": messages[msg_i]["content"]}
|
||||
convert_to_anthropic_tool_result(user_message_types_block)
|
||||
)
|
||||
elif isinstance(user_message_types_block["content"], str):
|
||||
_anthropic_content_text_element: AnthropicMessagesTextParam = {
|
||||
"type": "text",
|
||||
"text": user_message_types_block["content"],
|
||||
}
|
||||
anthropic_content_element = add_cache_control_to_content(
|
||||
anthropic_content_element=_anthropic_content_text_element,
|
||||
orignal_content_element=user_message_types_block,
|
||||
)
|
||||
|
||||
user_content.append(anthropic_content_element)
|
||||
|
||||
msg_i += 1
|
||||
|
||||
|
@ -1400,10 +1426,11 @@ def anthropic_messages_pt(
|
|||
assistant_content: List[AnthropicMessagesAssistantMessageValues] = []
|
||||
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||
if "content" in messages[msg_i] and isinstance(
|
||||
messages[msg_i]["content"], list
|
||||
assistant_content_block: ChatCompletionAssistantMessage = messages[msg_i] # type: ignore
|
||||
if "content" in assistant_content_block and isinstance(
|
||||
assistant_content_block["content"], list
|
||||
):
|
||||
for m in messages[msg_i]["content"]:
|
||||
for m in assistant_content_block["content"]:
|
||||
# handle text
|
||||
if (
|
||||
m.get("type", "") == "text" and len(m.get("text", "")) > 0
|
||||
|
@ -1417,35 +1444,37 @@ def anthropic_messages_pt(
|
|||
)
|
||||
assistant_content.append(anthropic_message)
|
||||
elif (
|
||||
"content" in messages[msg_i]
|
||||
and isinstance(messages[msg_i]["content"], str)
|
||||
and len(messages[msg_i]["content"])
|
||||
> 0 # don't pass empty text blocks. anthropic api raises errors.
|
||||
"content" in assistant_content_block
|
||||
and isinstance(assistant_content_block["content"], str)
|
||||
and assistant_content_block[
|
||||
"content"
|
||||
] # don't pass empty text blocks. anthropic api raises errors.
|
||||
):
|
||||
|
||||
_anthropic_text_content_element = {
|
||||
"type": "text",
|
||||
"text": messages[msg_i]["content"],
|
||||
"text": assistant_content_block["content"],
|
||||
}
|
||||
|
||||
anthropic_content_element = add_cache_control_to_content(
|
||||
anthropic_content_element=_anthropic_text_content_element,
|
||||
orignal_content_element=messages[msg_i],
|
||||
orignal_content_element=assistant_content_block,
|
||||
)
|
||||
assistant_content.append(anthropic_content_element)
|
||||
|
||||
if messages[msg_i].get(
|
||||
"tool_calls", []
|
||||
assistant_tool_calls = assistant_content_block.get("tool_calls")
|
||||
if (
|
||||
assistant_tool_calls is not None
|
||||
): # support assistant tool invoke conversion
|
||||
assistant_content.extend(
|
||||
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
|
||||
convert_to_anthropic_tool_invoke(assistant_tool_calls)
|
||||
)
|
||||
|
||||
if messages[msg_i].get("function_call"):
|
||||
assistant_function_call = assistant_content_block.get("function_call")
|
||||
|
||||
if assistant_function_call is not None:
|
||||
assistant_content.extend(
|
||||
convert_function_to_anthropic_tool_invoke(
|
||||
messages[msg_i]["function_call"]
|
||||
)
|
||||
convert_function_to_anthropic_tool_invoke(assistant_function_call)
|
||||
)
|
||||
|
||||
msg_i += 1
|
||||
|
|
|
@ -491,14 +491,9 @@ class CodestralTextCompletion(BaseLLM):
|
|||
message="HTTPStatusError - {}".format(e.response.text),
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.llms.text_completion_codestral.py::async_completion() - Exception occurred - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
raise TextCompletionCodestralError(
|
||||
status_code=500, message="{}".format(str(e))
|
||||
)
|
||||
) # don't use verbose_logger.exception, if exception is raised
|
||||
return self.process_text_completion_response(
|
||||
model=model,
|
||||
response=response,
|
||||
|
|
|
@ -445,9 +445,6 @@ async def acompletion(
|
|||
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
|
||||
return response
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.main.py::acompletion() - Exception occurred - {}".format(str(e))
|
||||
)
|
||||
custom_llm_provider = custom_llm_provider or "openai"
|
||||
raise exception_type(
|
||||
model=model,
|
||||
|
@ -616,9 +613,6 @@ def mock_completion(
|
|||
except Exception as e:
|
||||
if isinstance(e, openai.APIError):
|
||||
raise e
|
||||
verbose_logger.exception(
|
||||
"litellm.mock_completion(): Exception occured - {}".format(str(e))
|
||||
)
|
||||
raise Exception("Mock completion response failed")
|
||||
|
||||
|
||||
|
@ -5125,9 +5119,6 @@ async def ahealth_check(
|
|||
response = {} # args like remaining ratelimit etc.
|
||||
return response
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.ahealth_check(): Exception occured - {}".format(str(e))
|
||||
)
|
||||
stack_trace = traceback.format_exc()
|
||||
if isinstance(stack_trace, str):
|
||||
stack_trace = stack_trace[:1000]
|
||||
|
|
|
@ -1,6 +1,16 @@
|
|||
|
||||
model_list:
|
||||
- model_name: "whisper"
|
||||
litellm_params:
|
||||
model: "azure/azure-whisper"
|
||||
api_key: os.environ/AZURE_EUROPE_API_KEY
|
||||
api_base: "https://my-endpoint-europe-berri-992.openai.azure.com/"
|
||||
- model_name: gpt-4o-mini-2024-07-18
|
||||
litellm_params:
|
||||
api_key: API_KEY
|
||||
model: openai/gpt-4o-mini-2024-07-18
|
||||
rpm: 0
|
||||
tpm: 100
|
||||
|
||||
router_settings:
|
||||
num_retries: 0
|
||||
routing_strategy: usage-based-routing-v2
|
||||
timeout: 10
|
||||
|
||||
litellm_settings:
|
||||
callbacks: custom_callbacks.proxy_handler_instance
|
||||
|
|
|
@ -386,7 +386,6 @@ async def user_api_key_auth(
|
|||
parent_otel_span=parent_otel_span,
|
||||
)
|
||||
#### ELSE ####
|
||||
|
||||
## CHECK PASS-THROUGH ENDPOINTS ##
|
||||
if pass_through_endpoints is not None:
|
||||
for endpoint in pass_through_endpoints:
|
||||
|
|
|
@ -1,66 +1,10 @@
|
|||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import litellm
|
||||
|
||||
|
||||
# This file includes the custom callbacks for LiteLLM Proxy
|
||||
# Once defined, these can be passed in proxy_config.yaml
|
||||
class MyCustomHandler(CustomLogger):
|
||||
def log_pre_api_call(self, model, messages, kwargs):
|
||||
print(f"Pre-API Call") # noqa
|
||||
|
||||
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"Post-API Call") # noqa
|
||||
|
||||
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Stream") # noqa
|
||||
|
||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print("On Success") # noqa
|
||||
|
||||
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Failure") # noqa
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"ishaan async_log_success_event") # noqa
|
||||
# log: key, user, model, prompt, response, tokens, cost
|
||||
# Access kwargs passed to litellm.completion()
|
||||
model = kwargs.get("model", None)
|
||||
messages = kwargs.get("messages", None)
|
||||
user = kwargs.get("user", None)
|
||||
|
||||
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
metadata = litellm_params.get(
|
||||
"metadata", {}
|
||||
) # headers passed to LiteLLM proxy, can be found here
|
||||
|
||||
return
|
||||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
try:
|
||||
print(f"On Async Failure !") # noqa
|
||||
print("\nkwargs", kwargs) # noqa
|
||||
# Access kwargs passed to litellm.completion()
|
||||
model = kwargs.get("model", None)
|
||||
messages = kwargs.get("messages", None)
|
||||
user = kwargs.get("user", None)
|
||||
|
||||
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
metadata = litellm_params.get(
|
||||
"metadata", {}
|
||||
) # headers passed to LiteLLM proxy, can be found here
|
||||
|
||||
# Acess Exceptions & Traceback
|
||||
exception_event = kwargs.get("exception", None)
|
||||
traceback_event = kwargs.get("traceback_exception", None)
|
||||
|
||||
# Calculate cost using litellm.completion_cost()
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}") # noqa
|
||||
# print("Call failed")
|
||||
pass
|
||||
|
||||
|
||||
proxy_handler_instance = MyCustomHandler()
|
||||
|
||||
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
|
||||
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
||||
|
|
|
@ -6183,6 +6183,64 @@ async def delete_end_user(
|
|||
pass
|
||||
|
||||
|
||||
@router.get(
|
||||
"/customer/list",
|
||||
tags=["Customer Management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=List[LiteLLM_EndUserTable],
|
||||
)
|
||||
@router.get(
|
||||
"/end_user/list",
|
||||
tags=["Customer Management"],
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def list_team(
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
[Admin-only] List all available customers
|
||||
|
||||
```
|
||||
curl --location --request GET 'http://0.0.0.0:4000/customer/list' \
|
||||
--header 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
_duration_in_seconds,
|
||||
create_audit_log_for_update,
|
||||
litellm_proxy_admin_name,
|
||||
prisma_client,
|
||||
)
|
||||
|
||||
if (
|
||||
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
||||
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail={
|
||||
"error": "Admin-only endpoint. Your user role={}".format(
|
||||
user_api_key_dict.user_role
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||
)
|
||||
|
||||
response = await prisma_client.db.litellm_endusertable.find_many()
|
||||
|
||||
returned_response: List[LiteLLM_EndUserTable] = []
|
||||
for item in response:
|
||||
returned_response.append(LiteLLM_EndUserTable(**item.model_dump()))
|
||||
return returned_response
|
||||
|
||||
|
||||
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
||||
if premium_user is not True:
|
||||
return
|
||||
|
|
|
@ -47,6 +47,7 @@ from litellm._logging import verbose_router_logger
|
|||
from litellm.assistants.main import AssistantDeleted
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
|
@ -783,6 +784,10 @@ class Router:
|
|||
}
|
||||
)
|
||||
|
||||
logging_obj: Optional[LiteLLMLogging] = kwargs.get(
|
||||
"litellm_logging_obj", None
|
||||
)
|
||||
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
|
@ -797,11 +802,13 @@ class Router:
|
|||
- If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
|
||||
"""
|
||||
await self.async_routing_strategy_pre_call_checks(
|
||||
deployment=deployment
|
||||
deployment=deployment, logging_obj=logging_obj
|
||||
)
|
||||
response = await _response
|
||||
else:
|
||||
await self.async_routing_strategy_pre_call_checks(deployment=deployment)
|
||||
await self.async_routing_strategy_pre_call_checks(
|
||||
deployment=deployment, logging_obj=logging_obj
|
||||
)
|
||||
response = await _response
|
||||
|
||||
## CHECK CONTENT FILTER ERROR ##
|
||||
|
@ -3860,7 +3867,9 @@ class Router:
|
|||
if isinstance(_callback, CustomLogger):
|
||||
response = _callback.pre_call_check(deployment)
|
||||
|
||||
async def async_routing_strategy_pre_call_checks(self, deployment: dict):
|
||||
async def async_routing_strategy_pre_call_checks(
|
||||
self, deployment: dict, logging_obj: Optional[LiteLLMLogging] = None
|
||||
):
|
||||
"""
|
||||
For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.
|
||||
|
||||
|
@ -3875,8 +3884,22 @@ class Router:
|
|||
for _callback in litellm.callbacks:
|
||||
if isinstance(_callback, CustomLogger):
|
||||
try:
|
||||
response = await _callback.async_pre_call_check(deployment)
|
||||
_ = await _callback.async_pre_call_check(deployment)
|
||||
except litellm.RateLimitError as e:
|
||||
## LOG FAILURE EVENT
|
||||
if logging_obj is not None:
|
||||
asyncio.create_task(
|
||||
logging_obj.async_failure_handler(
|
||||
exception=e,
|
||||
traceback_exception=traceback.format_exc(),
|
||||
end_time=time.time(),
|
||||
)
|
||||
)
|
||||
## LOGGING
|
||||
threading.Thread(
|
||||
target=logging_obj.failure_handler,
|
||||
args=(e, traceback.format_exc()),
|
||||
).start() # log response
|
||||
self._set_cooldown_deployments(
|
||||
exception_status=e.status_code,
|
||||
original_exception=e,
|
||||
|
@ -3885,6 +3908,20 @@ class Router:
|
|||
)
|
||||
raise e
|
||||
except Exception as e:
|
||||
## LOG FAILURE EVENT
|
||||
if logging_obj is not None:
|
||||
asyncio.create_task(
|
||||
logging_obj.async_failure_handler(
|
||||
exception=e,
|
||||
traceback_exception=traceback.format_exc(),
|
||||
end_time=time.time(),
|
||||
)
|
||||
)
|
||||
## LOGGING
|
||||
threading.Thread(
|
||||
target=logging_obj.failure_handler,
|
||||
args=(e, traceback.format_exc()),
|
||||
).start() # log response
|
||||
raise e
|
||||
|
||||
def _generate_model_id(self, model_group: str, litellm_params: dict):
|
||||
|
|
|
@ -222,6 +222,94 @@ async def test_anthropic_api_prompt_caching_basic():
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_anthropic_api_prompt_caching_with_content_str():
|
||||
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||
|
||||
system_message = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Here is the full text of a complex legal agreement",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
]
|
||||
translated_system_message = litellm.AnthropicConfig().translate_system_message(
|
||||
messages=system_message
|
||||
)
|
||||
|
||||
assert translated_system_message == [
|
||||
# System Message
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
]
|
||||
user_messages = [
|
||||
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
||||
},
|
||||
# The final turn is marked with cache-control, for continuing in followups.
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
]
|
||||
|
||||
translated_messages = anthropic_messages_pt(
|
||||
messages=user_messages,
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
llm_provider="anthropic",
|
||||
)
|
||||
|
||||
expected_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
||||
}
|
||||
],
|
||||
},
|
||||
# The final turn is marked with cache-control, for continuing in followups.
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
assert len(translated_messages) == len(expected_messages)
|
||||
for idx, i in enumerate(translated_messages):
|
||||
assert (
|
||||
i == expected_messages[idx]
|
||||
), "Error on idx={}. Got={}, Expected={}".format(idx, i, expected_messages[idx])
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_anthropic_api_prompt_caching_no_headers():
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -616,8 +616,8 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
|
|||
aws_region_name=aws_region_name,
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
aws_sts_endpoint="https://sts-fips.us-west-2.amazonaws.com",
|
||||
aws_session_name="cross-region-test",
|
||||
aws_sts_endpoint="https://sts-fips.us-east-2.amazonaws.com",
|
||||
aws_bedrock_runtime_endpoint="https://bedrock-runtime-fips.us-west-2.amazonaws.com",
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
|
|
|
@ -3,6 +3,8 @@ from typing import Any, Dict, Iterable, List, Optional, Union
|
|||
from pydantic import BaseModel, validator
|
||||
from typing_extensions import Literal, Required, TypedDict
|
||||
|
||||
from .openai import ChatCompletionCachedContent
|
||||
|
||||
|
||||
class AnthropicMessagesToolChoice(TypedDict, total=False):
|
||||
type: Required[Literal["auto", "any", "tool"]]
|
||||
|
@ -18,7 +20,7 @@ class AnthropicMessagesTool(TypedDict, total=False):
|
|||
class AnthropicMessagesTextParam(TypedDict, total=False):
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
cache_control: Optional[dict]
|
||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class AnthropicMessagesToolUseParam(TypedDict):
|
||||
|
@ -58,7 +60,7 @@ class AnthropicImageParamSource(TypedDict):
|
|||
class AnthropicMessagesImageParam(TypedDict, total=False):
|
||||
type: Literal["image"]
|
||||
source: AnthropicImageParamSource
|
||||
cache_control: Optional[dict]
|
||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class AnthropicMessagesToolResultContent(TypedDict):
|
||||
|
@ -97,7 +99,7 @@ class AnthropicMetadata(TypedDict, total=False):
|
|||
class AnthropicSystemMessageContent(TypedDict, total=False):
|
||||
type: str
|
||||
text: str
|
||||
cache_control: Optional[dict]
|
||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class AnthropicMessagesRequest(TypedDict, total=False):
|
||||
|
|
|
@ -354,14 +354,18 @@ class ChatCompletionImageObject(TypedDict):
|
|||
image_url: ChatCompletionImageUrlObject
|
||||
|
||||
|
||||
class ChatCompletionUserMessage(TypedDict):
|
||||
class OpenAIChatCompletionUserMessage(TypedDict):
|
||||
role: Literal["user"]
|
||||
content: Union[
|
||||
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
||||
]
|
||||
|
||||
|
||||
class ChatCompletionAssistantMessage(TypedDict, total=False):
|
||||
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
|
||||
cache_control: ChatCompletionCachedContent
|
||||
|
||||
|
||||
class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
|
||||
role: Required[Literal["assistant"]]
|
||||
content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
|
||||
name: Optional[str]
|
||||
|
@ -369,6 +373,10 @@ class ChatCompletionAssistantMessage(TypedDict, total=False):
|
|||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||
|
||||
|
||||
class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
|
||||
cache_control: ChatCompletionCachedContent
|
||||
|
||||
|
||||
class ChatCompletionToolMessage(TypedDict):
|
||||
role: Literal["tool"]
|
||||
content: str
|
||||
|
@ -381,12 +389,16 @@ class ChatCompletionFunctionMessage(TypedDict):
|
|||
name: str
|
||||
|
||||
|
||||
class ChatCompletionSystemMessage(TypedDict, total=False):
|
||||
class OpenAIChatCompletionSystemMessage(TypedDict, total=False):
|
||||
role: Required[Literal["system"]]
|
||||
content: Required[Union[str, List]]
|
||||
name: str
|
||||
|
||||
|
||||
class ChatCompletionSystemMessage(OpenAIChatCompletionSystemMessage, total=False):
|
||||
cache_control: ChatCompletionCachedContent
|
||||
|
||||
|
||||
AllMessageValues = Union[
|
||||
ChatCompletionUserMessage,
|
||||
ChatCompletionAssistantMessage,
|
||||
|
|
|
@ -8547,11 +8547,6 @@ class CustomStreamWrapper:
|
|||
"finish_reason": finish_reason,
|
||||
}
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
def handle_huggingface_chunk(self, chunk):
|
||||
|
@ -8595,11 +8590,6 @@ class CustomStreamWrapper:
|
|||
"finish_reason": finish_reason,
|
||||
}
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||
|
@ -8826,11 +8816,6 @@ class CustomStreamWrapper:
|
|||
"usage": usage,
|
||||
}
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
def handle_azure_text_completion_chunk(self, chunk):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue