diff --git a/litellm/responses/litellm_completion_transformation/transformation.py b/litellm/responses/litellm_completion_transformation/transformation.py index fc50b47828..23b49c15ad 100644 --- a/litellm/responses/litellm_completion_transformation/transformation.py +++ b/litellm/responses/litellm_completion_transformation/transformation.py @@ -9,10 +9,12 @@ from litellm.types.llms.openai import ( ChatCompletionSystemMessage, ChatCompletionUserMessage, GenericChatCompletionMessage, + Reasoning, ResponseAPIUsage, ResponseInputParam, ResponsesAPIOptionalRequestParams, ResponsesAPIResponse, + ResponseTextConfig, ) from litellm.types.responses.main import GenericResponseOutputItem, OutputText from litellm.types.utils import Choices, Message, ModelResponse, Usage @@ -31,7 +33,7 @@ class LiteLLMCompletionResponsesConfig: """ Transform a Responses API request into a Chat Completion request """ - return { + litellm_completion_request: dict = { "messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages( input=input, responses_api_request=responses_api_request, @@ -45,10 +47,17 @@ class LiteLLMCompletionResponsesConfig: "parallel_tool_calls": responses_api_request.get("parallel_tool_calls"), "max_tokens": responses_api_request.get("max_output_tokens"), "stream": kwargs.get("stream", False), - "metadata": kwargs.get("metadata", {}), - "service_tier": kwargs.get("service_tier", ""), + "metadata": kwargs.get("metadata"), + "service_tier": kwargs.get("service_tier"), } + # only pass non-None values + litellm_completion_request = { + k: v for k, v in litellm_completion_request.items() if v is not None + } + + return litellm_completion_request + @staticmethod def transform_responses_api_input_to_messages( input: Union[str, ResponseInputParam], @@ -148,7 +157,7 @@ class LiteLLMCompletionResponsesConfig: chat_completion_response, "incomplete_details", None ), instructions=getattr(chat_completion_response, "instructions", None), - metadata=getattr(chat_completion_response, "metadata", None), + metadata=getattr(chat_completion_response, "metadata", {}), output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output( chat_completion_response=chat_completion_response, choices=getattr(chat_completion_response, "choices", []), @@ -156,7 +165,7 @@ class LiteLLMCompletionResponsesConfig: parallel_tool_calls=getattr( chat_completion_response, "parallel_tool_calls", False ), - temperature=getattr(chat_completion_response, "temperature", None), + temperature=getattr(chat_completion_response, "temperature", 0), tool_choice=getattr(chat_completion_response, "tool_choice", "auto"), tools=getattr(chat_completion_response, "tools", []), top_p=getattr(chat_completion_response, "top_p", None), @@ -166,11 +175,13 @@ class LiteLLMCompletionResponsesConfig: previous_response_id=getattr( chat_completion_response, "previous_response_id", None ), - reasoning=getattr(chat_completion_response, "reasoning", None), - status=getattr(chat_completion_response, "status", None), - text=getattr(chat_completion_response, "text", None), + reasoning=Reasoning(), + status=getattr(chat_completion_response, "status", "completed"), + text=ResponseTextConfig(), truncation=getattr(chat_completion_response, "truncation", None), - usage=getattr(chat_completion_response, "usage", None), + usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage( + chat_completion_response=chat_completion_response + ), user=getattr(chat_completion_response, "user", None), ) @@ -206,8 +217,15 @@ class LiteLLMCompletionResponsesConfig: @staticmethod def _transform_chat_completion_usage_to_responses_usage( - usage: Usage, + chat_completion_response: ModelResponse, ) -> ResponseAPIUsage: + usage: Optional[Usage] = getattr(chat_completion_response, "usage", None) + if usage is None: + return ResponseAPIUsage( + input_tokens=0, + output_tokens=0, + total_tokens=0, + ) return ResponseAPIUsage( input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens, diff --git a/litellm/responses/main.py b/litellm/responses/main.py index 70b651f376..a6ee92ffdb 100644 --- a/litellm/responses/main.py +++ b/litellm/responses/main.py @@ -10,6 +10,9 @@ from litellm.constants import request_timeout from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler +from litellm.responses.litellm_completion_transformation.handler import ( + LiteLLMCompletionTransformationHandler, +) from litellm.responses.utils import ResponsesAPIRequestUtils from litellm.types.llms.openai import ( Reasoning, @@ -29,6 +32,7 @@ from .streaming_iterator import BaseResponsesAPIStreamingIterator ####### ENVIRONMENT VARIABLES ################### # Initialize any necessary instances or variables here base_llm_http_handler = BaseLLMHTTPHandler() +litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler() ################################################# @@ -178,19 +182,12 @@ def responses( ) # get provider config - responses_api_provider_config: Optional[ - BaseResponsesAPIConfig - ] = ProviderConfigManager.get_provider_responses_api_config( - model=model, - provider=litellm.LlmProviders(custom_llm_provider), - ) - - if responses_api_provider_config is None: - raise litellm.BadRequestError( + responses_api_provider_config: Optional[BaseResponsesAPIConfig] = ( + ProviderConfigManager.get_provider_responses_api_config( model=model, - llm_provider=custom_llm_provider, - message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}", + provider=litellm.LlmProviders(custom_llm_provider), ) + ) local_vars.update(kwargs) # Get ResponsesAPIOptionalRequestParams with only valid parameters @@ -200,6 +197,16 @@ def responses( ) ) + if responses_api_provider_config is None: + return litellm_completion_transformation_handler.response_api_handler( + model=model, + input=input, + responses_api_request=ResponsesAPIOptionalRequestParams(), + custom_llm_provider=custom_llm_provider, + _is_async=_is_async, + **kwargs, + ) + # Get optional parameters for the responses API responses_api_request_params: Dict = ( ResponsesAPIRequestUtils.get_optional_params_responses_api( diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py index 356fe5e78e..884d9bda7b 100644 --- a/tests/llm_responses_api_testing/base_responses_api.py +++ b/tests/llm_responses_api_testing/base_responses_api.py @@ -68,16 +68,16 @@ def validate_responses_api_response(response, final_chunk: bool = False): "metadata": dict, "model": str, "object": str, - "temperature": (int, float), + "temperature": (int, float, type(None)), "tool_choice": (dict, str), "tools": list, - "top_p": (int, float), + "top_p": (int, float, type(None)), "max_output_tokens": (int, type(None)), "previous_response_id": (str, type(None)), "reasoning": dict, "status": str, "text": ResponseTextConfig, - "truncation": str, + "truncation": (str, type(None)), "usage": ResponseAPIUsage, "user": (str, type(None)), } diff --git a/tests/llm_responses_api_testing/test_anthropic_responses_api.py b/tests/llm_responses_api_testing/test_anthropic_responses_api.py new file mode 100644 index 0000000000..543d228bde --- /dev/null +++ b/tests/llm_responses_api_testing/test_anthropic_responses_api.py @@ -0,0 +1,29 @@ +import os +import sys +import pytest +import asyncio +from typing import Optional +from unittest.mock import patch, AsyncMock + +sys.path.insert(0, os.path.abspath("../..")) +import litellm +from litellm.integrations.custom_logger import CustomLogger +import json +from litellm.types.utils import StandardLoggingPayload +from litellm.types.llms.openai import ( + ResponseCompletedEvent, + ResponsesAPIResponse, + ResponseTextConfig, + ResponseAPIUsage, + IncompleteDetails, +) +import litellm +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler +from base_responses_api import BaseResponsesAPITest + +class TestAnthropicResponsesAPITest(BaseResponsesAPITest): + def get_base_completion_call_args(self): + #litellm._turn_on_debug() + return { + "model": "anthropic/claude-3-5-sonnet-latest", + }