feat(anthropic_adapter.py): support streaming requests for /v1/messages endpoint

Fixes https://github.com/BerriAI/litellm/issues/5011
2025-04-27 11:43:54 +00:00 · 2024-08-03 20:16:19 -07:00 · 2024-08-03 20:16:19 -07:00 · ac6c39c283
commit ac6c39c283
parent 39a98a2882
9 changed files with 425 additions and 35 deletions
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@ -4,7 +4,7 @@ import json
 import os
 import traceback
 import uuid
-from typing import Literal, Optional
+from typing import Any, Literal, Optional

 import dotenv
 import httpx
@ -13,7 +13,12 @@ from pydantic import BaseModel
 import litellm
 from litellm import ChatCompletionRequest, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
+from litellm.types.llms.anthropic import (
+    AnthropicMessagesRequest,
+    AnthropicResponse,
+    ContentBlockDelta,
+)
+from litellm.types.utils import AdapterCompletionStreamWrapper


 class AnthropicAdapter(CustomLogger):
@ -43,8 +48,147 @@ class AnthropicAdapter(CustomLogger):
            response=response
        )

-    def translate_completion_output_params_streaming(self) -> Optional[BaseModel]:
-        return super().translate_completion_output_params_streaming()
+    def translate_completion_output_params_streaming(
+        self, completion_stream: Any
+    ) -> AdapterCompletionStreamWrapper | None:
+        return AnthropicStreamWrapper(completion_stream=completion_stream)


 anthropic_adapter = AnthropicAdapter()
+
+
+class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
+    """
+    - first chunk return 'message_start'
+    - content block must be started and stopped
+    - finish_reason must map exactly to anthropic reason, else anthropic client won't be able to parse it.
+    """
+
+    sent_first_chunk: bool = False
+    sent_content_block_start: bool = False
+    sent_content_block_finish: bool = False
+    sent_last_message: bool = False
+    holding_chunk: Optional[Any] = None
+
+    def __next__(self):
+        try:
+            if self.sent_first_chunk is False:
+                self.sent_first_chunk = True
+                return {
+                    "type": "message_start",
+                    "message": {
+                        "id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
+                        "type": "message",
+                        "role": "assistant",
+                        "content": [],
+                        "model": "claude-3-5-sonnet-20240620",
+                        "stop_reason": None,
+                        "stop_sequence": None,
+                        "usage": {"input_tokens": 25, "output_tokens": 1},
+                    },
+                }
+            if self.sent_content_block_start is False:
+                self.sent_content_block_start = True
+                return {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "text", "text": ""},
+                }
+
+            for chunk in self.completion_stream:
+                if chunk == "None" or chunk is None:
+                    raise Exception
+
+                processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
+                    response=chunk
+                )
+                if (
+                    processed_chunk["type"] == "message_delta"
+                    and self.sent_content_block_finish is False
+                ):
+                    self.holding_chunk = processed_chunk
+                    self.sent_content_block_finish = True
+                    return {
+                        "type": "content_block_stop",
+                        "index": 0,
+                    }
+                elif self.holding_chunk is not None:
+                    return_chunk = self.holding_chunk
+                    self.holding_chunk = processed_chunk
+                    return return_chunk
+                else:
+                    return processed_chunk
+
+            if self.sent_last_message is False:
+                self.sent_last_message = True
+                return {"type": "message_stop"}
+            raise StopIteration
+        except StopIteration:
+            if self.sent_last_message is False:
+                self.sent_last_message = True
+                return {"type": "message_stop"}
+            raise StopIteration
+        except Exception as e:
+            verbose_logger.error(
+                "Anthropic Adapter - {}\n{}".format(e, traceback.format_exc())
+            )
+
+    async def __anext__(self):
+        try:
+            if self.sent_first_chunk is False:
+                self.sent_first_chunk = True
+                return {
+                    "type": "message_start",
+                    "message": {
+                        "id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
+                        "type": "message",
+                        "role": "assistant",
+                        "content": [],
+                        "model": "claude-3-5-sonnet-20240620",
+                        "stop_reason": None,
+                        "stop_sequence": None,
+                        "usage": {"input_tokens": 25, "output_tokens": 1},
+                    },
+                }
+            if self.sent_content_block_start is False:
+                self.sent_content_block_start = True
+                return {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "text", "text": ""},
+                }
+            async for chunk in self.completion_stream:
+                if chunk == "None" or chunk is None:
+                    raise Exception
+                processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
+                    response=chunk
+                )
+                if (
+                    processed_chunk["type"] == "message_delta"
+                    and self.sent_content_block_finish is False
+                ):
+                    self.holding_chunk = processed_chunk
+                    self.sent_content_block_finish = True
+                    return {
+                        "type": "content_block_stop",
+                        "index": 0,
+                    }
+                elif self.holding_chunk is not None:
+                    return_chunk = self.holding_chunk
+                    self.holding_chunk = processed_chunk
+                    return return_chunk
+                else:
+                    return processed_chunk
+            if self.holding_chunk is not None:
+                return_chunk = self.holding_chunk
+                self.holding_chunk = None
+                return return_chunk
+            if self.sent_last_message is False:
+                self.sent_last_message = True
+                return {"type": "message_stop"}
+            raise StopIteration
+        except StopIteration:
+            if self.sent_last_message is False:
+                self.sent_last_message = True
+                return {"type": "message_stop"}
+            raise StopAsyncIteration