Resolve Anthropic Overloaded error during stream

This commit is contained in:
zishaansunderji 2025-04-07 18:04:50 -04:00
parent 3a7d729d88
commit 9717d89e69
7 changed files with 323 additions and 6 deletions

View file

@ -119,6 +119,41 @@ async def run_async_fallback(
error_from_fallbacks = original_exception
# Handle mid-stream fallbacks by preserving already generated content
is_mid_stream = kwargs.pop("is_mid_stream_fallback", False)
previous_content = kwargs.pop("previous_content", "")
# If this is a mid-stream fallback and we have previous content, prepare messages
if is_mid_stream and previous_content and "messages" in kwargs:
messages = kwargs.get("messages", [])
if isinstance(messages, list) and len(messages) > 0:
if previous_content.strip():
# Check for a system message
system_msg_idx = None
for i, msg in enumerate(messages):
if msg.get("role") == "system":
system_msg_idx = i
break
continuation_text = f"The following is the beginning of an assistant's response. Continue from where it left off: '{previous_content}'"
if system_msg_idx is not None:
# Append to existing system message
messages[system_msg_idx]["content"] = messages[system_msg_idx].get("content", "") + "\n\n" + continuation_text
else:
# Add a new system message
messages.insert(0, {"role": "assistant", "content": continuation_text})
# Update kwargs with modified messages
kwargs["messages"] = messages
# Add to metadata to track this was a mid-stream fallback
kwargs.setdefault("metadata", {}).update({
"is_mid_stream_fallback": True,
"fallback_depth": fallback_depth,
"previous_content_length": len(previous_content)
})
for mg in fallback_model_group:
if mg == original_model_group:
continue
@ -139,6 +174,10 @@ async def run_async_fallback(
response = await litellm_router.async_function_with_fallbacks(
*args, **kwargs
)
if hasattr(response, "_hidden_params"):
response._hidden_params.setdefault("metadata", {})["mid_stream_fallback"] = True
# Also add to additional_headers for header propagation
response._hidden_params.setdefault("additional_headers", {})["x-litellm-mid-stream-fallback"] = True
verbose_router_logger.info("Successful fallback b/w models.")
response = add_fallback_headers_to_response(
response=response,