mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
Litellm dev 03 05 2025 p3 (#9023)
* fix(invoke_handler.py): fix converse streaming - return signature + ensure consistency with anthropic api response * build(model_prices_and_context_window.json): fix anthropic api claude-3-7 max output tokens with beta header this is 128k Resolves https://github.com/BerriAI/litellm/issues/8964 * feat(handler.py): handle new anthropic 'thinking_delta' block on streaming Fixes https://github.com/BerriAI/litellm/issues/8825
This commit is contained in:
parent
f6535ae6ad
commit
744e10b0f0
5 changed files with 26 additions and 11 deletions
|
@ -474,7 +474,10 @@ class ModelResponseIterator:
|
|||
if len(self.content_blocks) == 0:
|
||||
return False
|
||||
|
||||
if self.content_blocks[0]["delta"]["type"] == "text_delta":
|
||||
if (
|
||||
self.content_blocks[0]["delta"]["type"] == "text_delta"
|
||||
or self.content_blocks[0]["delta"]["type"] == "thinking_delta"
|
||||
):
|
||||
return False
|
||||
|
||||
for block in self.content_blocks:
|
||||
|
@ -617,9 +620,11 @@ class ModelResponseIterator:
|
|||
"index": self.tool_index,
|
||||
}
|
||||
elif type_chunk == "content_block_stop":
|
||||
|
||||
ContentBlockStop(**chunk) # type: ignore
|
||||
# check if tool call content block
|
||||
is_empty = self.check_empty_tool_call_args()
|
||||
|
||||
if is_empty:
|
||||
tool_use = {
|
||||
"id": None,
|
||||
|
|
|
@ -1260,6 +1260,9 @@ class AWSEventStreamDecoder:
|
|||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
if "text" in thinking_block:
|
||||
_thinking_block["thinking"] = thinking_block["text"]
|
||||
elif "signature" in thinking_block:
|
||||
_thinking_block["signature"] = thinking_block["signature"]
|
||||
_thinking_block["thinking"] = "" # consistent with anthropic response
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
|
@ -1322,6 +1325,12 @@ class AWSEventStreamDecoder:
|
|||
thinking_blocks = self.translate_thinking_blocks(
|
||||
delta_obj["reasoningContent"]
|
||||
)
|
||||
if (
|
||||
thinking_blocks
|
||||
and len(thinking_blocks) > 0
|
||||
and reasoning_content is None
|
||||
):
|
||||
reasoning_content = "" # set to non-empty string to ensure consistency with Anthropic
|
||||
elif (
|
||||
"contentBlockIndex" in chunk_data
|
||||
): # stop block, no 'start' or 'delta' object
|
||||
|
|
|
@ -2913,9 +2913,9 @@
|
|||
"supports_tool_choice": true
|
||||
},
|
||||
"claude-3-7-sonnet-latest": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
|
@ -2932,9 +2932,9 @@
|
|||
"supports_tool_choice": true
|
||||
},
|
||||
"claude-3-7-sonnet-20250219": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
|
|
|
@ -2913,9 +2913,9 @@
|
|||
"supports_tool_choice": true
|
||||
},
|
||||
"claude-3-7-sonnet-latest": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
|
@ -2932,9 +2932,9 @@
|
|||
"supports_tool_choice": true
|
||||
},
|
||||
"claude-3-7-sonnet-20250219": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
|
|
|
@ -1196,10 +1196,11 @@ def test_anthropic_thinking_output(model):
|
|||
[
|
||||
"anthropic/claude-3-7-sonnet-20250219",
|
||||
# "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
# "bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
],
|
||||
)
|
||||
def test_anthropic_thinking_output_stream(model):
|
||||
# litellm.set_verbose = True
|
||||
litellm.set_verbose = True
|
||||
try:
|
||||
# litellm._turn_on_debug()
|
||||
resp = litellm.completion(
|
||||
|
@ -1207,7 +1208,7 @@ def test_anthropic_thinking_output_stream(model):
|
|||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||
stream=True,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
timeout=5,
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
reasoning_content_exists = False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue