Merge pull request #9437 from BerriAI/litellm_dev_03_21_2025_p1

Consistent anthropic response_format streaming/non-streaming behaviour
This commit is contained in:
Krish Dholakia 2025-03-21 16:17:27 -07:00 committed by GitHub
commit c73f65da98
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 112 additions and 5 deletions

View file

@ -387,7 +387,7 @@ class AnthropicConfig(BaseConfig):
_input_schema["additionalProperties"] = True _input_schema["additionalProperties"] = True
_input_schema["properties"] = {} _input_schema["properties"] = {}
else: else:
_input_schema["properties"] = {"values": json_schema} _input_schema.update(cast(AnthropicInputSchema, json_schema))
_tool = AnthropicMessagesTool( _tool = AnthropicMessagesTool(
name=RESPONSE_FORMAT_TOOL_NAME, input_schema=_input_schema name=RESPONSE_FORMAT_TOOL_NAME, input_schema=_input_schema

View file

@ -0,0 +1,35 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from unittest.mock import MagicMock, patch
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
def test_response_format_transformation_unit_test():
config = AnthropicConfig()
response_format_json_schema = {
"description": 'Progress report for the thinking process\n\nThis model represents a snapshot of the agent\'s current progress during\nthe thinking process, providing a brief description of the current activity.\n\nAttributes:\n agent_doing: Brief description of what the agent is currently doing.\n Should be kept under 10 words. Example: "Learning about home automation"',
"properties": {"agent_doing": {"title": "Agent Doing", "type": "string"}},
"required": ["agent_doing"],
"title": "ThinkingStep",
"type": "object",
"additionalProperties": False,
}
result = config._create_json_tool_call_for_response_format(
json_schema=response_format_json_schema
)
assert result["input_schema"]["properties"] == {
"agent_doing": {"title": "Agent Doing", "type": "string"}
}
print(result)

View file

@ -20,6 +20,7 @@ from litellm.utils import (
get_optional_params, get_optional_params,
ProviderConfigManager, ProviderConfigManager,
) )
from litellm.main import stream_chunk_builder
from typing import Union from typing import Union
# test_example.py # test_example.py
@ -338,7 +339,7 @@ class BaseLLMChatTest(ABC):
@pytest.mark.flaky(retries=6, delay=1) @pytest.mark.flaky(retries=6, delay=1)
def test_json_response_pydantic_obj(self): def test_json_response_pydantic_obj(self):
litellm.set_verbose = True litellm._turn_on_debug()
from pydantic import BaseModel from pydantic import BaseModel
from litellm.utils import supports_response_schema from litellm.utils import supports_response_schema
@ -995,3 +996,73 @@ class BaseOSeriesModelsTest(ABC): # test across azure/openai
), "temperature should not be in the request body" ), "temperature should not be in the request body"
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
class BaseAnthropicChatTest(ABC):
"""
Ensures consistent result across anthropic model usage
"""
@abstractmethod
def get_base_completion_call_args(self) -> dict:
"""Must return the base completion call args"""
pass
@property
def completion_function(self):
return litellm.completion
def test_anthropic_response_format_streaming_vs_non_streaming(self):
litellm.set_verbose = True
args = {
"messages": [
{
"content": "Your goal is to summarize the previous agent's thinking process into short descriptions to let user better understand the research progress. If no information is available, just say generic phrase like 'Doing some research...' with the given output format. Make sure to adhere to the output format no matter what, even if you don't have any information or you are not allowed to respond to the given input information (then just say generic phrase like 'Doing some research...').",
"role": "system",
},
{
"role": "user",
"content": "Here is the input data (previous agent's output): \n\n Let's try to refine our search further, focusing more on the technical aspects of home automation and home energy system management:",
},
],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "final_output",
"strict": True,
"schema": {
"description": 'Progress report for the thinking process\n\nThis model represents a snapshot of the agent\'s current progress during\nthe thinking process, providing a brief description of the current activity.\n\nAttributes:\n agent_doing: Brief description of what the agent is currently doing.\n Should be kept under 10 words. Example: "Learning about home automation"',
"properties": {
"agent_doing": {"title": "Agent Doing", "type": "string"}
},
"required": ["agent_doing"],
"title": "ThinkingStep",
"type": "object",
"additionalProperties": False,
},
},
},
}
base_completion_call_args = self.get_base_completion_call_args()
response = self.completion_function(
**base_completion_call_args, **args, stream=True
)
chunks = []
for chunk in response:
print(f"chunk: {chunk}")
chunks.append(chunk)
print(f"chunks: {chunks}")
built_response = stream_chunk_builder(chunks=chunks)
non_stream_response = self.completion_function(
**base_completion_call_args, **args, stream=False
)
assert (
json.loads(built_response.choices[0].message.content).keys()
== json.loads(non_stream_response.choices[0].message.content).keys()
), f"Got={json.loads(built_response.choices[0].message.content)}, Expected={json.loads(non_stream_response.choices[0].message.content)}"

View file

@ -36,7 +36,7 @@ from litellm.types.llms.openai import ChatCompletionToolCallFunctionChunk
from litellm.llms.anthropic.common_utils import process_anthropic_headers from litellm.llms.anthropic.common_utils import process_anthropic_headers
from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
from httpx import Headers from httpx import Headers
from base_llm_unit_tests import BaseLLMChatTest from base_llm_unit_tests import BaseLLMChatTest, BaseAnthropicChatTest
def streaming_format_tests(chunk: dict, idx: int): def streaming_format_tests(chunk: dict, idx: int):
@ -455,14 +455,15 @@ def test_create_json_tool_call_for_response_format():
_input_schema = tool.get("input_schema") _input_schema = tool.get("input_schema")
assert _input_schema is not None assert _input_schema is not None
assert _input_schema.get("type") == "object" assert _input_schema.get("type") == "object"
assert _input_schema.get("properties") == {"values": custom_schema} assert _input_schema.get("name") == custom_schema["name"]
assert _input_schema.get("age") == custom_schema["age"]
assert "additionalProperties" not in _input_schema assert "additionalProperties" not in _input_schema
from litellm import completion from litellm import completion
class TestAnthropicCompletion(BaseLLMChatTest): class TestAnthropicCompletion(BaseLLMChatTest, BaseAnthropicChatTest):
def get_base_completion_call_args(self) -> dict: def get_base_completion_call_args(self) -> dict:
return {"model": "anthropic/claude-3-5-sonnet-20240620"} return {"model": "anthropic/claude-3-5-sonnet-20240620"}