fix(anthropic.py): support *real* anthropic tool calling + streaming

Parses each chunk and translates to openai format
This commit is contained in:
Krrish Dholakia 2024-07-03 19:48:35 -07:00
parent ed5fc3d1f9
commit 344268e053
4 changed files with 453 additions and 202 deletions

View file

@ -8003,6 +8003,11 @@ class CustomStreamWrapper:
return hold, curr_chunk
def handle_anthropic_text_chunk(self, chunk):
"""
For old anthropic models - claude-1, claude-2.
Claude-3 is handled from within Anthropic.py VIA ModelResponseIterator()
"""
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
@ -8031,48 +8036,6 @@ class CustomStreamWrapper:
"finish_reason": finish_reason,
}
def handle_anthropic_chunk(self, chunk):
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
text = ""
is_finished = False
finish_reason = None
if str_line.startswith("data:"):
data_json = json.loads(str_line[5:])
type_chunk = data_json.get("type", None)
if type_chunk == "content_block_delta":
"""
Anthropic content chunk
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
"""
text = data_json.get("delta", {}).get("text", "")
elif type_chunk == "message_delta":
"""
Anthropic
chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
"""
# TODO - get usage from this chunk, set in response
finish_reason = data_json.get("delta", {}).get("stop_reason", None)
is_finished = True
return {
"text": text,
"is_finished": is_finished,
"finish_reason": finish_reason,
}
elif "error" in str_line:
raise ValueError(f"Unable to parse response. Original response: {str_line}")
else:
return {
"text": text,
"is_finished": is_finished,
"finish_reason": finish_reason,
}
def handle_vertexai_anthropic_chunk(self, chunk):
"""
- MessageStartEvent(message=Message(id='msg_01LeRRgvX4gwkX3ryBVgtuYZ', content=[], model='claude-3-sonnet-20240229', role='assistant', stop_reason=None, stop_sequence=None, type='message', usage=Usage(input_tokens=8, output_tokens=1)), type='message_start'); custom_llm_provider: vertex_ai
@ -8823,10 +8786,30 @@ class CustomStreamWrapper:
# return this for all models
completion_obj = {"content": ""}
if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
response_obj = self.handle_anthropic_chunk(chunk)
from litellm.types.llms.bedrock import GenericStreamingChunk
if self.received_finish_reason is not None:
raise StopIteration
response_obj: GenericStreamingChunk = chunk
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
self.received_finish_reason = response_obj["finish_reason"]
if (
self.stream_options
and self.stream_options.get("include_usage", False) is True
and response_obj["usage"] is not None
):
self.sent_stream_usage = True
model_response.usage = litellm.Usage(
prompt_tokens=response_obj["usage"]["inputTokens"],
completion_tokens=response_obj["usage"]["outputTokens"],
total_tokens=response_obj["usage"]["totalTokens"],
)
if "tool_use" in response_obj and response_obj["tool_use"] is not None:
completion_obj["tool_calls"] = [response_obj["tool_use"]]
elif (
self.custom_llm_provider
and self.custom_llm_provider == "anthropic_text"