mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Resolve Anthropic Overloaded error during stream
This commit is contained in:
parent
3a7d729d88
commit
9717d89e69
7 changed files with 323 additions and 6 deletions
|
@ -119,6 +119,41 @@ async def run_async_fallback(
|
|||
|
||||
error_from_fallbacks = original_exception
|
||||
|
||||
# Handle mid-stream fallbacks by preserving already generated content
|
||||
is_mid_stream = kwargs.pop("is_mid_stream_fallback", False)
|
||||
previous_content = kwargs.pop("previous_content", "")
|
||||
|
||||
# If this is a mid-stream fallback and we have previous content, prepare messages
|
||||
if is_mid_stream and previous_content and "messages" in kwargs:
|
||||
messages = kwargs.get("messages", [])
|
||||
|
||||
if isinstance(messages, list) and len(messages) > 0:
|
||||
if previous_content.strip():
|
||||
# Check for a system message
|
||||
system_msg_idx = None
|
||||
for i, msg in enumerate(messages):
|
||||
if msg.get("role") == "system":
|
||||
system_msg_idx = i
|
||||
break
|
||||
|
||||
continuation_text = f"The following is the beginning of an assistant's response. Continue from where it left off: '{previous_content}'"
|
||||
|
||||
if system_msg_idx is not None:
|
||||
# Append to existing system message
|
||||
messages[system_msg_idx]["content"] = messages[system_msg_idx].get("content", "") + "\n\n" + continuation_text
|
||||
else:
|
||||
# Add a new system message
|
||||
messages.insert(0, {"role": "assistant", "content": continuation_text})
|
||||
|
||||
# Update kwargs with modified messages
|
||||
kwargs["messages"] = messages
|
||||
# Add to metadata to track this was a mid-stream fallback
|
||||
kwargs.setdefault("metadata", {}).update({
|
||||
"is_mid_stream_fallback": True,
|
||||
"fallback_depth": fallback_depth,
|
||||
"previous_content_length": len(previous_content)
|
||||
})
|
||||
|
||||
for mg in fallback_model_group:
|
||||
if mg == original_model_group:
|
||||
continue
|
||||
|
@ -139,6 +174,10 @@ async def run_async_fallback(
|
|||
response = await litellm_router.async_function_with_fallbacks(
|
||||
*args, **kwargs
|
||||
)
|
||||
if hasattr(response, "_hidden_params"):
|
||||
response._hidden_params.setdefault("metadata", {})["mid_stream_fallback"] = True
|
||||
# Also add to additional_headers for header propagation
|
||||
response._hidden_params.setdefault("additional_headers", {})["x-litellm-mid-stream-fallback"] = True
|
||||
verbose_router_logger.info("Successful fallback b/w models.")
|
||||
response = add_fallback_headers_to_response(
|
||||
response=response,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue