fixes for deleting responses, response API

This commit is contained in:
Ishaan Jaff 2025-04-22 12:54:05 -07:00
parent 63bde3dc73
commit edebe69ac0
9 changed files with 254 additions and 40 deletions

View file

@ -1259,10 +1259,10 @@ class BaseLLMHTTPHandler:
def delete_response_api_handler( def delete_response_api_handler(
self, self,
response_id: str, response_id: str,
custom_llm_provider: str,
responses_api_provider_config: BaseResponsesAPIConfig, responses_api_provider_config: BaseResponsesAPIConfig,
litellm_params: GenericLiteLLMParams, litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj, logging_obj: LiteLLMLoggingObj,
custom_llm_provider: Optional[str],
extra_headers: Optional[Dict[str, Any]] = None, extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None,
@ -1313,7 +1313,7 @@ class BaseLLMHTTPHandler:
try: try:
response = sync_httpx_client.delete( response = sync_httpx_client.delete(
url=api_base, headers=headers, data=json.dumps(data), timeout=timeout url=url, headers=headers, data=json.dumps(data), timeout=timeout
) )
except Exception as e: except Exception as e:

View file

@ -230,7 +230,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
OpenAI API expects the following request OpenAI API expects the following request
- DELETE /v1/responses/{response_id} - DELETE /v1/responses/{response_id}
""" """
url = f"{api_base}/responses/{response_id}" url = f"{api_base}/{response_id}"
data = {} data = {}
return url, data return url, data

View file

@ -24,6 +24,7 @@ from litellm.types.llms.openai import (
ToolChoice, ToolChoice,
ToolParam, ToolParam,
) )
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.utils import ProviderConfigManager, client from litellm.utils import ProviderConfigManager, client
@ -122,6 +123,7 @@ async def aresponses(
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response, responses_api_response=response,
kwargs=kwargs, kwargs=kwargs,
custom_llm_provider=custom_llm_provider,
) )
return response return response
except Exception as e: except Exception as e:
@ -260,6 +262,7 @@ def responses(
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response, responses_api_response=response,
kwargs=kwargs, kwargs=kwargs,
custom_llm_provider=custom_llm_provider,
) )
return response return response
@ -271,3 +274,94 @@ def responses(
completion_kwargs=local_vars, completion_kwargs=local_vars,
extra_kwargs=kwargs, extra_kwargs=kwargs,
) )
@client
def delete_responses(
response_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
# LiteLLM specific params,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> DeleteResponseResult:
"""
Synchronous version of the DELETE Responses API
DELETE /v1/responses/{response_id} endpoint in the responses API
"""
local_vars = locals()
try:
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
_is_async = kwargs.pop("aresponses", False) is True
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
# get custom llm provider from response_id
decoded_response_id: DecodedResponseId = (
ResponsesAPIRequestUtils._decode_responses_api_response_id(
response_id=response_id,
)
)
response_id = decoded_response_id.get("response_id") or response_id
custom_llm_provider = (
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
)
# get provider config
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
ProviderConfigManager.get_provider_responses_api_config(
model=None,
provider=litellm.LlmProviders(custom_llm_provider),
)
)
if responses_api_provider_config is None:
raise ValueError(
f"DELETE responses is not supported for {custom_llm_provider}"
)
local_vars.update(kwargs)
# Pre Call logging
litellm_logging_obj.update_environment_variables(
model=None,
optional_params={
"response_id": response_id,
},
litellm_params={
"litellm_call_id": litellm_call_id,
},
custom_llm_provider=custom_llm_provider,
)
# Call the handler with _is_async flag instead of directly calling the async handler
response = base_llm_http_handler.delete_response_api_handler(
response_id=response_id,
custom_llm_provider=custom_llm_provider,
responses_api_provider_config=responses_api_provider_config,
litellm_params=litellm_params,
logging_obj=litellm_logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout or request_timeout,
_is_async=_is_async,
client=kwargs.get("client"),
)
return response
except Exception as e:
raise litellm.exception_type(
model=None,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=local_vars,
extra_kwargs=kwargs,
)

View file

@ -1,6 +1,8 @@
import base64 import base64
from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints
from typing_extensions import TypedDict
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
@ -9,6 +11,7 @@ from litellm.types.llms.openai import (
ResponsesAPIOptionalRequestParams, ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse, ResponsesAPIResponse,
) )
from litellm.types.responses.main import DecodedResponseId
from litellm.types.utils import SpecialEnums, Usage from litellm.types.utils import SpecialEnums, Usage
@ -84,29 +87,35 @@ class ResponsesAPIRequestUtils:
def _update_responses_api_response_id_with_model_id( def _update_responses_api_response_id_with_model_id(
responses_api_response: ResponsesAPIResponse, responses_api_response: ResponsesAPIResponse,
kwargs: Dict[str, Any], kwargs: Dict[str, Any],
custom_llm_provider: Optional[str],
) -> ResponsesAPIResponse: ) -> ResponsesAPIResponse:
"""Update the responses_api_response_id with the model_id""" """
Update the responses_api_response_id with model_id and custom_llm_provider
This builds a composite ID containing the custom LLM provider, model ID, and original response ID
"""
litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {} litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {} model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
model_id = model_info.get("id") model_id = model_info.get("id")
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id( updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
model_id=model_id, model_id=model_id,
custom_llm_provider=custom_llm_provider,
response_id=responses_api_response.id, response_id=responses_api_response.id,
) )
responses_api_response.id = updated_id responses_api_response.id = updated_id
return responses_api_response return responses_api_response
@staticmethod @staticmethod
def _build_responses_api_response_id( def _build_responses_api_response_id(
custom_llm_provider: Optional[str],
model_id: Optional[str], model_id: Optional[str],
response_id: str, response_id: str,
) -> str: ) -> str:
"""Build the responses_api_response_id""" """Build the responses_api_response_id"""
if model_id is None:
return response_id
assembled_id: str = str( assembled_id: str = str(
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
).format(model_id, response_id) ).format(custom_llm_provider, model_id, response_id)
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode( base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
"utf-8" "utf-8"
) )
@ -115,12 +124,12 @@ class ResponsesAPIRequestUtils:
@staticmethod @staticmethod
def _decode_responses_api_response_id( def _decode_responses_api_response_id(
response_id: str, response_id: str,
) -> Tuple[Optional[str], str]: ) -> DecodedResponseId:
""" """
Decode the responses_api_response_id Decode the responses_api_response_id
Returns: Returns:
Tuple of model_id, response_id (from upstream provider) DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
""" """
try: try:
# Remove prefix and decode # Remove prefix and decode
@ -129,16 +138,45 @@ class ResponsesAPIRequestUtils:
# Parse components using known prefixes # Parse components using known prefixes
if ";" not in decoded_id: if ";" not in decoded_id:
return None, response_id return DecodedResponseId(
custom_llm_provider=None,
model_id=None,
response_id=response_id,
)
model_part, response_part = decoded_id.split(";", 1) parts = decoded_id.split(";")
model_id = model_part.replace("litellm:model_id:", "")
# Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
custom_llm_provider = None
model_id = None
if (
len(parts) >= 3
): # Full format with custom_llm_provider, model_id, and response_id
custom_llm_provider_part = parts[0]
model_id_part = parts[1]
response_part = parts[2]
custom_llm_provider = custom_llm_provider_part.replace(
"litellm:custom_llm_provider:", ""
)
model_id = model_id_part.replace("model_id:", "")
decoded_response_id = response_part.replace("response_id:", "") decoded_response_id = response_part.replace("response_id:", "")
else:
decoded_response_id = response_id
return model_id, decoded_response_id return DecodedResponseId(
custom_llm_provider=custom_llm_provider,
model_id=model_id,
response_id=decoded_response_id,
)
except Exception as e: except Exception as e:
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}") verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
return None, response_id return DecodedResponseId(
custom_llm_provider=None,
model_id=None,
response_id=response_id,
)
class ResponseAPILoggingUtils: class ResponseAPILoggingUtils:

View file

@ -31,11 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
if previous_response_id is None: if previous_response_id is None:
return healthy_deployments return healthy_deployments
model_id, response_id = ( decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id(
ResponsesAPIRequestUtils._decode_responses_api_response_id(
response_id=previous_response_id, response_id=previous_response_id,
) )
) model_id = decoded_response.get("model_id")
if model_id is None: if model_id is None:
return healthy_deployments return healthy_deployments

View file

@ -1,5 +1,6 @@
from typing import Literal from typing import Literal
from pydantic import PrivateAttr
from typing_extensions import Any, List, Optional, TypedDict from typing_extensions import Any, List, Optional, TypedDict
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
@ -62,3 +63,14 @@ class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject):
id: Optional[str] id: Optional[str]
object: Optional[str] object: Optional[str]
deleted: Optional[bool] deleted: Optional[bool]
# Define private attributes using PrivateAttr
_hidden_params: dict = PrivateAttr(default_factory=dict)
class DecodedResponseId(TypedDict, total=False):
"""Structure representing a decoded response ID"""
custom_llm_provider: Optional[str]
model_id: Optional[str]
response_id: str

View file

@ -2254,7 +2254,9 @@ class SpecialEnums(Enum):
LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy" LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}" LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}" LITELLM_MANAGED_RESPONSE_COMPLETE_STR = (
"litellm:custom_llm_provider:{};model_id:{};response_id:{}"
)
LLMResponseTypes = Union[ LLMResponseTypes = Union[

View file

@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
## DYNAMIC CALLBACKS ## ## DYNAMIC CALLBACKS ##
dynamic_callbacks: Optional[ dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
List[Union[str, Callable, CustomLogger]] kwargs.pop("callbacks", None)
] = kwargs.pop("callbacks", None) )
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
if len(all_callbacks) > 0: if len(all_callbacks) > 0:
@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915
exception=e, exception=e,
retry_policy=kwargs.get("retry_policy"), retry_policy=kwargs.get("retry_policy"),
) )
kwargs[ kwargs["retry_policy"] = (
"retry_policy" reset_retry_policy()
] = reset_retry_policy() # prevent infinite loops ) # prevent infinite loops
litellm.num_retries = ( litellm.num_retries = (
None # set retries to None to prevent infinite loops None # set retries to None to prevent infinite loops
) )
@ -3028,16 +3028,16 @@ def get_optional_params( # noqa: PLR0915
True # so that main.py adds the function call to the prompt True # so that main.py adds the function call to the prompt
) )
if "tools" in non_default_params: if "tools" in non_default_params:
optional_params[ optional_params["functions_unsupported_model"] = (
"functions_unsupported_model" non_default_params.pop("tools")
] = non_default_params.pop("tools") )
non_default_params.pop( non_default_params.pop(
"tool_choice", None "tool_choice", None
) # causes ollama requests to hang ) # causes ollama requests to hang
elif "functions" in non_default_params: elif "functions" in non_default_params:
optional_params[ optional_params["functions_unsupported_model"] = (
"functions_unsupported_model" non_default_params.pop("functions")
] = non_default_params.pop("functions") )
elif ( elif (
litellm.add_function_to_prompt litellm.add_function_to_prompt
): # if user opts to add it to prompt instead ): # if user opts to add it to prompt instead
@ -3060,11 +3060,11 @@ def get_optional_params( # noqa: PLR0915
if "response_format" in non_default_params: if "response_format" in non_default_params:
if provider_config is not None: if provider_config is not None:
non_default_params[ non_default_params["response_format"] = (
"response_format" provider_config.get_json_schema_from_pydantic_object(
] = provider_config.get_json_schema_from_pydantic_object(
response_format=non_default_params["response_format"] response_format=non_default_params["response_format"]
) )
)
else: else:
non_default_params["response_format"] = type_to_response_format_param( non_default_params["response_format"] = type_to_response_format_param(
response_format=non_default_params["response_format"] response_format=non_default_params["response_format"]
@ -4079,9 +4079,9 @@ def _count_characters(text: str) -> int:
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str: def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
_choices: Union[ _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
List[Union[Choices, StreamingChoices]], List[StreamingChoices] response_obj.choices
] = response_obj.choices )
response_str = "" response_str = ""
for choice in _choices: for choice in _choices:
@ -6625,8 +6625,8 @@ class ProviderConfigManager:
@staticmethod @staticmethod
def get_provider_responses_api_config( def get_provider_responses_api_config(
model: str,
provider: LlmProviders, provider: LlmProviders,
model: Optional[str] = None,
) -> Optional[BaseResponsesAPIConfig]: ) -> Optional[BaseResponsesAPIConfig]:
if litellm.LlmProviders.OPENAI == provider: if litellm.LlmProviders.OPENAI == provider:
return litellm.OpenAIResponsesAPIConfig() return litellm.OpenAIResponsesAPIConfig()

View file

@ -189,6 +189,75 @@ class BaseResponsesAPITest(ABC):
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
if sync_mode:
response = litellm.responses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
# delete the response
if isinstance(response, ResponsesAPIResponse):
litellm.delete_responses(
response_id=response.id,
)
else:
raise ValueError("response is not a ResponsesAPIResponse")
# else:
# response = await litellm.aresponses(
# input="Basic ping", max_output_tokens=20,
# **base_completion_call_args
# )
# # async delete the response
# await litellm.adelete_responses(
# response_id=response.id,
# )
# @pytest.mark.parametrize("sync_mode", [True, False])
# @pytest.mark.asyncio
# async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
# litellm._turn_on_debug()
# litellm.set_verbose = True
# base_completion_call_args = self.get_base_completion_call_args()
# if sync_mode:
# response_id = None
# response = litellm.responses(
# input="Basic ping", max_output_tokens=20,
# stream=True,
# **base_completion_call_args
# )
# for event in response:
# if event.type == "response.completed":
# response_id = event.response.id
# break
# # delete the response
# litellm.delete_responses(
# response_id=response_id,
# )
# else:
# response = await litellm.aresponses(
# input="Basic ping", max_output_tokens=20,
# stream=True,
# **base_completion_call_args
# )
# async for event in response:
# if event.type == "response.completed":
# response_id = event.response.id
# break
# # async delete the response
# await litellm.adelete_responses(
# response_id=response_id,
# )