mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Support caching on reasoning content + other fixes (#8973)
* fix(factory.py): pass on anthropic thinking content from assistant call * fix(factory.py): fix anthropic messages to handle thinking blocks Fixes https://github.com/BerriAI/litellm/issues/8961 * fix(factory.py): fix bedrock handling for assistant content in messages Fixes https://github.com/BerriAI/litellm/issues/8961 * feat(convert_dict_to_response.py): handle reasoning content + thinking blocks in chat completion block ensures caching works for anthropic thinking block * fix(convert_dict_to_response.py): pass all message params to delta block ensures streaming delta also contains the reasoning content / thinking block * test(test_prompt_factory.py): remove redundant test anthropic now supports assistant as the first message * fix(factory.py): fix linting errors * fix: fix code qa * test: remove falsy test * fix(litellm_logging.py): fix str conversion
This commit is contained in:
parent
4c8b4fefc9
commit
662c59adcf
11 changed files with 230 additions and 50 deletions
|
@ -247,7 +247,6 @@ class LLMCachingHandler:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
call_type = original_function.__name__
|
call_type = original_function.__name__
|
||||||
|
|
||||||
cached_result = self._convert_cached_result_to_model_response(
|
cached_result = self._convert_cached_result_to_model_response(
|
||||||
cached_result=cached_result,
|
cached_result=cached_result,
|
||||||
call_type=call_type,
|
call_type=call_type,
|
||||||
|
@ -725,6 +724,7 @@ class LLMCachingHandler:
|
||||||
"""
|
"""
|
||||||
Sync internal method to add the result to the cache
|
Sync internal method to add the result to the cache
|
||||||
"""
|
"""
|
||||||
|
|
||||||
new_kwargs = kwargs.copy()
|
new_kwargs = kwargs.copy()
|
||||||
new_kwargs.update(
|
new_kwargs.update(
|
||||||
convert_args_to_kwargs(
|
convert_args_to_kwargs(
|
||||||
|
@ -738,6 +738,7 @@ class LLMCachingHandler:
|
||||||
if self._should_store_result_in_cache(
|
if self._should_store_result_in_cache(
|
||||||
original_function=self.original_function, kwargs=new_kwargs
|
original_function=self.original_function, kwargs=new_kwargs
|
||||||
):
|
):
|
||||||
|
|
||||||
litellm.cache.add_cache(result, **new_kwargs)
|
litellm.cache.add_cache(result, **new_kwargs)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
|
@ -9,6 +9,7 @@ from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
|
from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
|
||||||
|
from litellm.types.llms.openai import ChatCompletionThinkingBlock
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
ChatCompletionDeltaToolCall,
|
ChatCompletionDeltaToolCall,
|
||||||
ChatCompletionMessageToolCall,
|
ChatCompletionMessageToolCall,
|
||||||
|
@ -128,12 +129,7 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
|
||||||
model_response_object = ModelResponse(stream=True)
|
model_response_object = ModelResponse(stream=True)
|
||||||
choice_list = []
|
choice_list = []
|
||||||
for idx, choice in enumerate(response_object["choices"]):
|
for idx, choice in enumerate(response_object["choices"]):
|
||||||
delta = Delta(
|
delta = Delta(**choice["message"])
|
||||||
content=choice["message"].get("content", None),
|
|
||||||
role=choice["message"]["role"],
|
|
||||||
function_call=choice["message"].get("function_call", None),
|
|
||||||
tool_calls=choice["message"].get("tool_calls", None),
|
|
||||||
)
|
|
||||||
finish_reason = choice.get("finish_reason", None)
|
finish_reason = choice.get("finish_reason", None)
|
||||||
if finish_reason is None:
|
if finish_reason is None:
|
||||||
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
||||||
|
@ -456,10 +452,19 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
provider_specific_fields[field] = choice["message"][field]
|
provider_specific_fields[field] = choice["message"][field]
|
||||||
|
|
||||||
# Handle reasoning models that display `reasoning_content` within `content`
|
# Handle reasoning models that display `reasoning_content` within `content`
|
||||||
|
if "reasoning_content" in choice["message"]:
|
||||||
|
reasoning_content = choice["message"]["reasoning_content"]
|
||||||
|
content = choice["message"]["content"]
|
||||||
|
else:
|
||||||
|
reasoning_content, content = _parse_content_for_reasoning(
|
||||||
|
choice["message"].get("content")
|
||||||
|
)
|
||||||
|
|
||||||
reasoning_content, content = _parse_content_for_reasoning(
|
# Handle thinking models that display `thinking_blocks` within `content`
|
||||||
choice["message"].get("content")
|
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||||
)
|
if "thinking_blocks" in choice["message"]:
|
||||||
|
thinking_blocks = choice["message"]["thinking_blocks"]
|
||||||
|
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||||
|
|
||||||
if reasoning_content:
|
if reasoning_content:
|
||||||
provider_specific_fields["reasoning_content"] = (
|
provider_specific_fields["reasoning_content"] = (
|
||||||
|
@ -474,6 +479,7 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
audio=choice["message"].get("audio", None),
|
audio=choice["message"].get("audio", None),
|
||||||
provider_specific_fields=provider_specific_fields,
|
provider_specific_fields=provider_specific_fields,
|
||||||
reasoning_content=reasoning_content,
|
reasoning_content=reasoning_content,
|
||||||
|
thinking_blocks=thinking_blocks,
|
||||||
)
|
)
|
||||||
finish_reason = choice.get("finish_reason", None)
|
finish_reason = choice.get("finish_reason", None)
|
||||||
if finish_reason is None:
|
if finish_reason is None:
|
||||||
|
|
|
@ -1282,6 +1282,7 @@ def add_cache_control_to_content(
|
||||||
AnthropicMessagesImageParam,
|
AnthropicMessagesImageParam,
|
||||||
AnthropicMessagesTextParam,
|
AnthropicMessagesTextParam,
|
||||||
AnthropicMessagesDocumentParam,
|
AnthropicMessagesDocumentParam,
|
||||||
|
ChatCompletionThinkingBlock,
|
||||||
],
|
],
|
||||||
orignal_content_element: Union[dict, AllMessageValues],
|
orignal_content_element: Union[dict, AllMessageValues],
|
||||||
):
|
):
|
||||||
|
@ -1454,12 +1455,23 @@ def anthropic_messages_pt( # noqa: PLR0915
|
||||||
assistant_content_block["content"], list
|
assistant_content_block["content"], list
|
||||||
):
|
):
|
||||||
for m in assistant_content_block["content"]:
|
for m in assistant_content_block["content"]:
|
||||||
# handle text
|
# handle thinking blocks
|
||||||
|
thinking_block = cast(str, m.get("thinking", ""))
|
||||||
|
text_block = cast(str, m.get("text", ""))
|
||||||
if (
|
if (
|
||||||
m.get("type", "") == "text" and len(m.get("text", "")) > 0
|
m.get("type", "") == "thinking" and len(thinking_block) > 0
|
||||||
|
): # don't pass empty text blocks. anthropic api raises errors.
|
||||||
|
anthropic_message: Union[
|
||||||
|
ChatCompletionThinkingBlock,
|
||||||
|
AnthropicMessagesTextParam,
|
||||||
|
] = cast(ChatCompletionThinkingBlock, m)
|
||||||
|
assistant_content.append(anthropic_message)
|
||||||
|
# handle text
|
||||||
|
elif (
|
||||||
|
m.get("type", "") == "text" and len(text_block) > 0
|
||||||
): # don't pass empty text blocks. anthropic api raises errors.
|
): # don't pass empty text blocks. anthropic api raises errors.
|
||||||
anthropic_message = AnthropicMessagesTextParam(
|
anthropic_message = AnthropicMessagesTextParam(
|
||||||
type="text", text=m.get("text")
|
type="text", text=text_block
|
||||||
)
|
)
|
||||||
_cached_message = add_cache_control_to_content(
|
_cached_message = add_cache_control_to_content(
|
||||||
anthropic_content_element=anthropic_message,
|
anthropic_content_element=anthropic_message,
|
||||||
|
@ -1512,6 +1524,7 @@ def anthropic_messages_pt( # noqa: PLR0915
|
||||||
msg_i += 1
|
msg_i += 1
|
||||||
|
|
||||||
if assistant_content:
|
if assistant_content:
|
||||||
|
|
||||||
new_messages.append({"role": "assistant", "content": assistant_content})
|
new_messages.append({"role": "assistant", "content": assistant_content})
|
||||||
|
|
||||||
if msg_i == init_msg_i: # prevent infinite loops
|
if msg_i == init_msg_i: # prevent infinite loops
|
||||||
|
@ -1520,17 +1533,6 @@ def anthropic_messages_pt( # noqa: PLR0915
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=llm_provider,
|
llm_provider=llm_provider,
|
||||||
)
|
)
|
||||||
if not new_messages or new_messages[0]["role"] != "user":
|
|
||||||
if litellm.modify_params:
|
|
||||||
new_messages.insert(
|
|
||||||
0, {"role": "user", "content": [{"type": "text", "text": "."}]}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"Invalid first message={}. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, ".format(
|
|
||||||
new_messages
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if new_messages[-1]["role"] == "assistant":
|
if new_messages[-1]["role"] == "assistant":
|
||||||
if isinstance(new_messages[-1]["content"], str):
|
if isinstance(new_messages[-1]["content"], str):
|
||||||
|
@ -2924,7 +2926,14 @@ class BedrockConverseMessagesProcessor:
|
||||||
assistants_parts: List[BedrockContentBlock] = []
|
assistants_parts: List[BedrockContentBlock] = []
|
||||||
for element in _assistant_content:
|
for element in _assistant_content:
|
||||||
if isinstance(element, dict):
|
if isinstance(element, dict):
|
||||||
if element["type"] == "text":
|
if element["type"] == "thinking":
|
||||||
|
thinking_block = BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks(
|
||||||
|
thinking_blocks=[
|
||||||
|
cast(ChatCompletionThinkingBlock, element)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assistants_parts.extend(thinking_block)
|
||||||
|
elif element["type"] == "text":
|
||||||
assistants_part = BedrockContentBlock(
|
assistants_part = BedrockContentBlock(
|
||||||
text=element["text"]
|
text=element["text"]
|
||||||
)
|
)
|
||||||
|
@ -3157,7 +3166,14 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
||||||
assistants_parts: List[BedrockContentBlock] = []
|
assistants_parts: List[BedrockContentBlock] = []
|
||||||
for element in _assistant_content:
|
for element in _assistant_content:
|
||||||
if isinstance(element, dict):
|
if isinstance(element, dict):
|
||||||
if element["type"] == "text":
|
if element["type"] == "thinking":
|
||||||
|
thinking_block = BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks(
|
||||||
|
thinking_blocks=[
|
||||||
|
cast(ChatCompletionThinkingBlock, element)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assistants_parts.extend(thinking_block)
|
||||||
|
elif element["type"] == "text":
|
||||||
assistants_part = BedrockContentBlock(text=element["text"])
|
assistants_part = BedrockContentBlock(text=element["text"])
|
||||||
assistants_parts.append(assistants_part)
|
assistants_parts.append(assistants_part)
|
||||||
elif element["type"] == "image_url":
|
elif element["type"] == "image_url":
|
||||||
|
|
|
@ -361,6 +361,7 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
|
||||||
type: Required[Literal["thinking"]]
|
type: Required[Literal["thinking"]]
|
||||||
thinking: str
|
thinking: str
|
||||||
signature_delta: str
|
signature_delta: str
|
||||||
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class OpenAIChatCompletionTextObject(TypedDict):
|
class OpenAIChatCompletionTextObject(TypedDict):
|
||||||
|
@ -449,7 +450,11 @@ class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
|
||||||
|
|
||||||
class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
|
class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
|
||||||
role: Required[Literal["assistant"]]
|
role: Required[Literal["assistant"]]
|
||||||
content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
|
content: Optional[
|
||||||
|
Union[
|
||||||
|
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionThinkingBlock]]
|
||||||
|
]
|
||||||
|
]
|
||||||
name: Optional[str]
|
name: Optional[str]
|
||||||
tool_calls: Optional[List[ChatCompletionAssistantToolCall]]
|
tool_calls: Optional[List[ChatCompletionAssistantToolCall]]
|
||||||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||||
|
|
|
@ -1048,6 +1048,7 @@ def client(original_function): # noqa: PLR0915
|
||||||
)
|
)
|
||||||
|
|
||||||
if caching_handler_response.cached_result is not None:
|
if caching_handler_response.cached_result is not None:
|
||||||
|
verbose_logger.debug("Cache hit!")
|
||||||
return caching_handler_response.cached_result
|
return caching_handler_response.cached_result
|
||||||
|
|
||||||
# CHECK MAX TOKENS
|
# CHECK MAX TOKENS
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -855,3 +855,51 @@ def test_convert_to_model_response_object_with_empty_str():
|
||||||
resp: ModelResponse = convert_to_model_response_object(**args)
|
resp: ModelResponse = convert_to_model_response_object(**args)
|
||||||
assert resp is not None
|
assert resp is not None
|
||||||
assert resp.choices[0].message.content is not None
|
assert resp.choices[0].message.content is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_convert_to_model_response_object_with_thinking_content():
|
||||||
|
"""Test that convert_to_model_response_object handles thinking content correctly."""
|
||||||
|
|
||||||
|
args = {
|
||||||
|
"response_object": {
|
||||||
|
"id": "chatcmpl-8cc87354-70f3-4a14-b71b-332e965d98d2",
|
||||||
|
"created": 1741057687,
|
||||||
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": "# LiteLLM\n\nLiteLLM is an open-source library that provides a unified interface for working with various Large Language Models (LLMs). It acts as an abstraction layer that lets developers interact with multiple LLM providers through a single, consistent API.\n\n## Key features:\n\n- **Universal API**: Standardizes interactions with models from OpenAI, Anthropic, Cohere, Azure, and many other providers\n- **Simple switching**: Easily swap between different LLM providers without changing your code\n- **Routing capabilities**: Manage load balancing, fallbacks, and cost optimization\n- **Prompt templates**: Handle different model-specific prompt formats automatically\n- **Logging and observability**: Track usage, performance, and costs across providers\n\nLiteLLM is particularly useful for teams who want flexibility in their LLM infrastructure without creating custom integration code for each provider.",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": None,
|
||||||
|
"function_call": None,
|
||||||
|
"reasoning_content": "The person is asking about \"litellm\" and included what appears to be a UUID or some form of identifier at the end of their message (fffffe14-7991-43d0-acd8-d3e606db31a8).\n\nLiteLLM is an open-source library/project that provides a unified interface for working with various Large Language Models (LLMs). It's essentially a lightweight package that standardizes the way developers can work with different LLM APIs like OpenAI, Anthropic, Cohere, etc. through a consistent interface.\n\nSome key features and aspects of LiteLLM:\n\n1. Unified API for multiple LLM providers (OpenAI, Anthropic, Azure, etc.)\n2. Standardized input/output formats\n3. Handles routing, fallbacks, and load balancing\n4. Provides logging and observability\n5. Can help with cost tracking across different providers\n6. Makes it easier to switch between different LLM providers\n\nThe UUID-like string they included doesn't seem directly related to the question, unless it's some form of identifier they're including for tracking purposes.",
|
||||||
|
"thinking_blocks": [
|
||||||
|
{
|
||||||
|
"type": "thinking",
|
||||||
|
"thinking": "The person is asking about \"litellm\" and included what appears to be a UUID or some form of identifier at the end of their message (fffffe14-7991-43d0-acd8-d3e606db31a8).\n\nLiteLLM is an open-source library/project that provides a unified interface for working with various Large Language Models (LLMs). It's essentially a lightweight package that standardizes the way developers can work with different LLM APIs like OpenAI, Anthropic, Cohere, etc. through a consistent interface.\n\nSome key features and aspects of LiteLLM:\n\n1. Unified API for multiple LLM providers (OpenAI, Anthropic, Azure, etc.)\n2. Standardized input/output formats\n3. Handles routing, fallbacks, and load balancing\n4. Provides logging and observability\n5. Can help with cost tracking across different providers\n6. Makes it easier to switch between different LLM providers\n\nThe UUID-like string they included doesn't seem directly related to the question, unless it's some form of identifier they're including for tracking purposes.",
|
||||||
|
"signature": "ErUBCkYIARgCIkCf+r0qMSOMYkjlFERM00IxsY9I/m19dQGEF/Zv1E0AtvdZjKGnr+nr5vXUldmb/sUCgrQRH4YUyV0X3MoMrsNnEgxDqhUFcUTg1vM0CroaDEY1wKJ0Ca0EZ6S1jCIwF8ATum3xiF/mRSIIjoD6Virh0hFcOfH3Sz6Chtev9WUwwYMAVP4/hyzbrUDnsUlmKh0CfTayaXm6o63/6Kelr6pzLbErjQx2xZRnRjCypw==",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 460,
|
||||||
|
"prompt_tokens": 65,
|
||||||
|
"total_tokens": 525,
|
||||||
|
"completion_tokens_details": None,
|
||||||
|
"prompt_tokens_details": {"audio_tokens": None, "cached_tokens": 0},
|
||||||
|
"cache_creation_input_tokens": 0,
|
||||||
|
"cache_read_input_tokens": 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"model_response_object": ModelResponse(),
|
||||||
|
}
|
||||||
|
|
||||||
|
resp: ModelResponse = convert_to_model_response_object(**args)
|
||||||
|
assert resp is not None
|
||||||
|
assert resp.choices[0].message.reasoning_content is not None
|
||||||
|
|
|
@ -125,28 +125,6 @@ def test_anthropic_pt_formatting():
|
||||||
assert anthropic_pt(messages) == expected_prompt
|
assert anthropic_pt(messages) == expected_prompt
|
||||||
|
|
||||||
|
|
||||||
def test_anthropic_messages_pt():
|
|
||||||
# Test case: No messages (filtered system messages only)
|
|
||||||
litellm.modify_params = True
|
|
||||||
messages = []
|
|
||||||
expected_messages = [{"role": "user", "content": [{"type": "text", "text": "."}]}]
|
|
||||||
assert (
|
|
||||||
anthropic_messages_pt(
|
|
||||||
messages, model="claude-3-sonnet-20240229", llm_provider="anthropic"
|
|
||||||
)
|
|
||||||
== expected_messages
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test case: No messages (filtered system messages only) when modify_params is False should raise error
|
|
||||||
litellm.modify_params = False
|
|
||||||
messages = []
|
|
||||||
with pytest.raises(Exception) as err:
|
|
||||||
anthropic_messages_pt(
|
|
||||||
messages, model="claude-3-sonnet-20240229", llm_provider="anthropic"
|
|
||||||
)
|
|
||||||
assert "Invalid first message" in str(err.value)
|
|
||||||
|
|
||||||
|
|
||||||
def test_anthropic_messages_nested_pt():
|
def test_anthropic_messages_nested_pt():
|
||||||
from litellm.types.llms.anthropic import (
|
from litellm.types.llms.anthropic import (
|
||||||
AnthopicMessagesAssistantMessageParam,
|
AnthopicMessagesAssistantMessageParam,
|
||||||
|
|
|
@ -2561,3 +2561,30 @@ def test_redis_caching_multiple_namespaces():
|
||||||
|
|
||||||
# request 4 without a namespace should not be cached under the same key as request 3
|
# request 4 without a namespace should not be cached under the same key as request 3
|
||||||
assert response_4.id != response_3.id
|
assert response_4.id != response_3.id
|
||||||
|
|
||||||
|
|
||||||
|
def test_caching_with_reasoning_content():
|
||||||
|
"""
|
||||||
|
Test that reasoning content is cached
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
messages = [{"role": "user", "content": f"what is litellm? {uuid.uuid4()}"}]
|
||||||
|
litellm.cache = Cache()
|
||||||
|
|
||||||
|
response_1 = completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-latest",
|
||||||
|
messages=messages,
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
|
||||||
|
response_2 = completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-latest",
|
||||||
|
messages=messages,
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"response 2: {response_2.model_dump_json(indent=4)}")
|
||||||
|
assert response_2._hidden_params["cache_hit"] == True
|
||||||
|
assert response_2.choices[0].message.reasoning_content is not None
|
||||||
|
|
|
@ -257,6 +257,8 @@ def test_aaparallel_function_call_with_anthropic_thinking(model):
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
) # get a new response from the model where it can see the function response
|
) # get a new response from the model where it can see the function response
|
||||||
print("second response\n", second_response)
|
print("second response\n", second_response)
|
||||||
|
|
||||||
|
## THIRD RESPONSE
|
||||||
except litellm.InternalServerError as e:
|
except litellm.InternalServerError as e:
|
||||||
print(e)
|
print(e)
|
||||||
except litellm.RateLimitError as e:
|
except litellm.RateLimitError as e:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue