mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
feat(anthropic_adapter.py): support streaming requests for /v1/messages
endpoint
Fixes https://github.com/BerriAI/litellm/issues/5011
This commit is contained in:
parent
39a98a2882
commit
ac6c39c283
9 changed files with 425 additions and 35 deletions
|
@ -4,7 +4,7 @@ import json
|
|||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Literal, Optional
|
||||
from typing import Any, Literal, Optional
|
||||
|
||||
import dotenv
|
||||
import httpx
|
||||
|
@ -13,7 +13,12 @@ from pydantic import BaseModel
|
|||
import litellm
|
||||
from litellm import ChatCompletionRequest, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
|
||||
from litellm.types.llms.anthropic import (
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicResponse,
|
||||
ContentBlockDelta,
|
||||
)
|
||||
from litellm.types.utils import AdapterCompletionStreamWrapper
|
||||
|
||||
|
||||
class AnthropicAdapter(CustomLogger):
|
||||
|
@ -43,8 +48,147 @@ class AnthropicAdapter(CustomLogger):
|
|||
response=response
|
||||
)
|
||||
|
||||
def translate_completion_output_params_streaming(self) -> Optional[BaseModel]:
|
||||
return super().translate_completion_output_params_streaming()
|
||||
def translate_completion_output_params_streaming(
|
||||
self, completion_stream: Any
|
||||
) -> AdapterCompletionStreamWrapper | None:
|
||||
return AnthropicStreamWrapper(completion_stream=completion_stream)
|
||||
|
||||
|
||||
anthropic_adapter = AnthropicAdapter()
|
||||
|
||||
|
||||
class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
|
||||
"""
|
||||
- first chunk return 'message_start'
|
||||
- content block must be started and stopped
|
||||
- finish_reason must map exactly to anthropic reason, else anthropic client won't be able to parse it.
|
||||
"""
|
||||
|
||||
sent_first_chunk: bool = False
|
||||
sent_content_block_start: bool = False
|
||||
sent_content_block_finish: bool = False
|
||||
sent_last_message: bool = False
|
||||
holding_chunk: Optional[Any] = None
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
if self.sent_first_chunk is False:
|
||||
self.sent_first_chunk = True
|
||||
return {
|
||||
"type": "message_start",
|
||||
"message": {
|
||||
"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [],
|
||||
"model": "claude-3-5-sonnet-20240620",
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 25, "output_tokens": 1},
|
||||
},
|
||||
}
|
||||
if self.sent_content_block_start is False:
|
||||
self.sent_content_block_start = True
|
||||
return {
|
||||
"type": "content_block_start",
|
||||
"index": 0,
|
||||
"content_block": {"type": "text", "text": ""},
|
||||
}
|
||||
|
||||
for chunk in self.completion_stream:
|
||||
if chunk == "None" or chunk is None:
|
||||
raise Exception
|
||||
|
||||
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
|
||||
response=chunk
|
||||
)
|
||||
if (
|
||||
processed_chunk["type"] == "message_delta"
|
||||
and self.sent_content_block_finish is False
|
||||
):
|
||||
self.holding_chunk = processed_chunk
|
||||
self.sent_content_block_finish = True
|
||||
return {
|
||||
"type": "content_block_stop",
|
||||
"index": 0,
|
||||
}
|
||||
elif self.holding_chunk is not None:
|
||||
return_chunk = self.holding_chunk
|
||||
self.holding_chunk = processed_chunk
|
||||
return return_chunk
|
||||
else:
|
||||
return processed_chunk
|
||||
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopIteration
|
||||
except StopIteration:
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopIteration
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"Anthropic Adapter - {}\n{}".format(e, traceback.format_exc())
|
||||
)
|
||||
|
||||
async def __anext__(self):
|
||||
try:
|
||||
if self.sent_first_chunk is False:
|
||||
self.sent_first_chunk = True
|
||||
return {
|
||||
"type": "message_start",
|
||||
"message": {
|
||||
"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [],
|
||||
"model": "claude-3-5-sonnet-20240620",
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 25, "output_tokens": 1},
|
||||
},
|
||||
}
|
||||
if self.sent_content_block_start is False:
|
||||
self.sent_content_block_start = True
|
||||
return {
|
||||
"type": "content_block_start",
|
||||
"index": 0,
|
||||
"content_block": {"type": "text", "text": ""},
|
||||
}
|
||||
async for chunk in self.completion_stream:
|
||||
if chunk == "None" or chunk is None:
|
||||
raise Exception
|
||||
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
|
||||
response=chunk
|
||||
)
|
||||
if (
|
||||
processed_chunk["type"] == "message_delta"
|
||||
and self.sent_content_block_finish is False
|
||||
):
|
||||
self.holding_chunk = processed_chunk
|
||||
self.sent_content_block_finish = True
|
||||
return {
|
||||
"type": "content_block_stop",
|
||||
"index": 0,
|
||||
}
|
||||
elif self.holding_chunk is not None:
|
||||
return_chunk = self.holding_chunk
|
||||
self.holding_chunk = processed_chunk
|
||||
return return_chunk
|
||||
else:
|
||||
return processed_chunk
|
||||
if self.holding_chunk is not None:
|
||||
return_chunk = self.holding_chunk
|
||||
self.holding_chunk = None
|
||||
return return_chunk
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopIteration
|
||||
except StopIteration:
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopAsyncIteration
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue