mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
fixes to test
This commit is contained in:
parent
4e2b04a1e0
commit
e8b9b4f68b
4 changed files with 78 additions and 24 deletions
|
@ -9,10 +9,12 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionSystemMessage,
|
||||
ChatCompletionUserMessage,
|
||||
GenericChatCompletionMessage,
|
||||
Reasoning,
|
||||
ResponseAPIUsage,
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
)
|
||||
from litellm.types.responses.main import GenericResponseOutputItem, OutputText
|
||||
from litellm.types.utils import Choices, Message, ModelResponse, Usage
|
||||
|
@ -31,7 +33,7 @@ class LiteLLMCompletionResponsesConfig:
|
|||
"""
|
||||
Transform a Responses API request into a Chat Completion request
|
||||
"""
|
||||
return {
|
||||
litellm_completion_request: dict = {
|
||||
"messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
|
||||
input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
|
@ -45,10 +47,17 @@ class LiteLLMCompletionResponsesConfig:
|
|||
"parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
|
||||
"max_tokens": responses_api_request.get("max_output_tokens"),
|
||||
"stream": kwargs.get("stream", False),
|
||||
"metadata": kwargs.get("metadata", {}),
|
||||
"service_tier": kwargs.get("service_tier", ""),
|
||||
"metadata": kwargs.get("metadata"),
|
||||
"service_tier": kwargs.get("service_tier"),
|
||||
}
|
||||
|
||||
# only pass non-None values
|
||||
litellm_completion_request = {
|
||||
k: v for k, v in litellm_completion_request.items() if v is not None
|
||||
}
|
||||
|
||||
return litellm_completion_request
|
||||
|
||||
@staticmethod
|
||||
def transform_responses_api_input_to_messages(
|
||||
input: Union[str, ResponseInputParam],
|
||||
|
@ -148,7 +157,7 @@ class LiteLLMCompletionResponsesConfig:
|
|||
chat_completion_response, "incomplete_details", None
|
||||
),
|
||||
instructions=getattr(chat_completion_response, "instructions", None),
|
||||
metadata=getattr(chat_completion_response, "metadata", None),
|
||||
metadata=getattr(chat_completion_response, "metadata", {}),
|
||||
output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
|
||||
chat_completion_response=chat_completion_response,
|
||||
choices=getattr(chat_completion_response, "choices", []),
|
||||
|
@ -156,7 +165,7 @@ class LiteLLMCompletionResponsesConfig:
|
|||
parallel_tool_calls=getattr(
|
||||
chat_completion_response, "parallel_tool_calls", False
|
||||
),
|
||||
temperature=getattr(chat_completion_response, "temperature", None),
|
||||
temperature=getattr(chat_completion_response, "temperature", 0),
|
||||
tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
|
||||
tools=getattr(chat_completion_response, "tools", []),
|
||||
top_p=getattr(chat_completion_response, "top_p", None),
|
||||
|
@ -166,11 +175,13 @@ class LiteLLMCompletionResponsesConfig:
|
|||
previous_response_id=getattr(
|
||||
chat_completion_response, "previous_response_id", None
|
||||
),
|
||||
reasoning=getattr(chat_completion_response, "reasoning", None),
|
||||
status=getattr(chat_completion_response, "status", None),
|
||||
text=getattr(chat_completion_response, "text", None),
|
||||
reasoning=Reasoning(),
|
||||
status=getattr(chat_completion_response, "status", "completed"),
|
||||
text=ResponseTextConfig(),
|
||||
truncation=getattr(chat_completion_response, "truncation", None),
|
||||
usage=getattr(chat_completion_response, "usage", None),
|
||||
usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
|
||||
chat_completion_response=chat_completion_response
|
||||
),
|
||||
user=getattr(chat_completion_response, "user", None),
|
||||
)
|
||||
|
||||
|
@ -206,8 +217,15 @@ class LiteLLMCompletionResponsesConfig:
|
|||
|
||||
@staticmethod
|
||||
def _transform_chat_completion_usage_to_responses_usage(
|
||||
usage: Usage,
|
||||
chat_completion_response: ModelResponse,
|
||||
) -> ResponseAPIUsage:
|
||||
usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
|
||||
if usage is None:
|
||||
return ResponseAPIUsage(
|
||||
input_tokens=0,
|
||||
output_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
return ResponseAPIUsage(
|
||||
input_tokens=usage.prompt_tokens,
|
||||
output_tokens=usage.completion_tokens,
|
||||
|
|
|
@ -10,6 +10,9 @@ from litellm.constants import request_timeout
|
|||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
|
||||
from litellm.responses.litellm_completion_transformation.handler import (
|
||||
LiteLLMCompletionTransformationHandler,
|
||||
)
|
||||
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||
from litellm.types.llms.openai import (
|
||||
Reasoning,
|
||||
|
@ -29,6 +32,7 @@ from .streaming_iterator import BaseResponsesAPIStreamingIterator
|
|||
####### ENVIRONMENT VARIABLES ###################
|
||||
# Initialize any necessary instances or variables here
|
||||
base_llm_http_handler = BaseLLMHTTPHandler()
|
||||
litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
|
||||
#################################################
|
||||
|
||||
|
||||
|
@ -178,19 +182,12 @@ def responses(
|
|||
)
|
||||
|
||||
# get provider config
|
||||
responses_api_provider_config: Optional[
|
||||
BaseResponsesAPIConfig
|
||||
] = ProviderConfigManager.get_provider_responses_api_config(
|
||||
model=model,
|
||||
provider=litellm.LlmProviders(custom_llm_provider),
|
||||
)
|
||||
|
||||
if responses_api_provider_config is None:
|
||||
raise litellm.BadRequestError(
|
||||
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
|
||||
ProviderConfigManager.get_provider_responses_api_config(
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
|
||||
provider=litellm.LlmProviders(custom_llm_provider),
|
||||
)
|
||||
)
|
||||
|
||||
local_vars.update(kwargs)
|
||||
# Get ResponsesAPIOptionalRequestParams with only valid parameters
|
||||
|
@ -200,6 +197,16 @@ def responses(
|
|||
)
|
||||
)
|
||||
|
||||
if responses_api_provider_config is None:
|
||||
return litellm_completion_transformation_handler.response_api_handler(
|
||||
model=model,
|
||||
input=input,
|
||||
responses_api_request=ResponsesAPIOptionalRequestParams(),
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
_is_async=_is_async,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Get optional parameters for the responses API
|
||||
responses_api_request_params: Dict = (
|
||||
ResponsesAPIRequestUtils.get_optional_params_responses_api(
|
||||
|
|
|
@ -68,16 +68,16 @@ def validate_responses_api_response(response, final_chunk: bool = False):
|
|||
"metadata": dict,
|
||||
"model": str,
|
||||
"object": str,
|
||||
"temperature": (int, float),
|
||||
"temperature": (int, float, type(None)),
|
||||
"tool_choice": (dict, str),
|
||||
"tools": list,
|
||||
"top_p": (int, float),
|
||||
"top_p": (int, float, type(None)),
|
||||
"max_output_tokens": (int, type(None)),
|
||||
"previous_response_id": (str, type(None)),
|
||||
"reasoning": dict,
|
||||
"status": str,
|
||||
"text": ResponseTextConfig,
|
||||
"truncation": str,
|
||||
"truncation": (str, type(None)),
|
||||
"usage": ResponseAPIUsage,
|
||||
"user": (str, type(None)),
|
||||
}
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from unittest.mock import patch, AsyncMock
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import json
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
ResponseAPIUsage,
|
||||
IncompleteDetails,
|
||||
)
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from base_responses_api import BaseResponsesAPITest
|
||||
|
||||
class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
|
||||
def get_base_completion_call_args(self):
|
||||
#litellm._turn_on_debug()
|
||||
return {
|
||||
"model": "anthropic/claude-3-5-sonnet-latest",
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue