diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index ca4014b578..d62d9d4112 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -1259,10 +1259,10 @@ class BaseLLMHTTPHandler: def delete_response_api_handler( self, response_id: str, - custom_llm_provider: str, responses_api_provider_config: BaseResponsesAPIConfig, litellm_params: GenericLiteLLMParams, logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str], extra_headers: Optional[Dict[str, Any]] = None, extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, @@ -1313,7 +1313,7 @@ class BaseLLMHTTPHandler: try: response = sync_httpx_client.delete( - url=api_base, headers=headers, data=json.dumps(data), timeout=timeout + url=url, headers=headers, data=json.dumps(data), timeout=timeout ) except Exception as e: diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py index 783b22dbb5..8f71ddaa39 100644 --- a/litellm/llms/openai/responses/transformation.py +++ b/litellm/llms/openai/responses/transformation.py @@ -230,7 +230,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig): OpenAI API expects the following request - DELETE /v1/responses/{response_id} """ - url = f"{api_base}/responses/{response_id}" + url = f"{api_base}/{response_id}" data = {} return url, data diff --git a/litellm/responses/main.py b/litellm/responses/main.py index 2d7426205e..3e52f80023 100644 --- a/litellm/responses/main.py +++ b/litellm/responses/main.py @@ -24,6 +24,7 @@ from litellm.types.llms.openai import ( ToolChoice, ToolParam, ) +from litellm.types.responses.main import * from litellm.types.router import GenericLiteLLMParams from litellm.utils import ProviderConfigManager, client @@ -122,6 +123,7 @@ async def aresponses( response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( responses_api_response=response, kwargs=kwargs, + custom_llm_provider=custom_llm_provider, ) return response except Exception as e: @@ -260,6 +262,7 @@ def responses( response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( responses_api_response=response, kwargs=kwargs, + custom_llm_provider=custom_llm_provider, ) return response @@ -271,3 +274,94 @@ def responses( completion_kwargs=local_vars, extra_kwargs=kwargs, ) + + +@client +def delete_responses( + response_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Optional[Dict[str, Any]] = None, + extra_query: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + # LiteLLM specific params, + custom_llm_provider: Optional[str] = None, + **kwargs, +) -> DeleteResponseResult: + """ + Synchronous version of the DELETE Responses API + + DELETE /v1/responses/{response_id} endpoint in the responses API + + """ + local_vars = locals() + try: + litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore + litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None) + _is_async = kwargs.pop("aresponses", False) is True + + # get llm provider logic + litellm_params = GenericLiteLLMParams(**kwargs) + + # get custom llm provider from response_id + decoded_response_id: DecodedResponseId = ( + ResponsesAPIRequestUtils._decode_responses_api_response_id( + response_id=response_id, + ) + ) + response_id = decoded_response_id.get("response_id") or response_id + custom_llm_provider = ( + decoded_response_id.get("custom_llm_provider") or custom_llm_provider + ) + + # get provider config + responses_api_provider_config: Optional[BaseResponsesAPIConfig] = ( + ProviderConfigManager.get_provider_responses_api_config( + model=None, + provider=litellm.LlmProviders(custom_llm_provider), + ) + ) + + if responses_api_provider_config is None: + raise ValueError( + f"DELETE responses is not supported for {custom_llm_provider}" + ) + + local_vars.update(kwargs) + + # Pre Call logging + litellm_logging_obj.update_environment_variables( + model=None, + optional_params={ + "response_id": response_id, + }, + litellm_params={ + "litellm_call_id": litellm_call_id, + }, + custom_llm_provider=custom_llm_provider, + ) + + # Call the handler with _is_async flag instead of directly calling the async handler + response = base_llm_http_handler.delete_response_api_handler( + response_id=response_id, + custom_llm_provider=custom_llm_provider, + responses_api_provider_config=responses_api_provider_config, + litellm_params=litellm_params, + logging_obj=litellm_logging_obj, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout or request_timeout, + _is_async=_is_async, + client=kwargs.get("client"), + ) + + return response + except Exception as e: + raise litellm.exception_type( + model=None, + custom_llm_provider=custom_llm_provider, + original_exception=e, + completion_kwargs=local_vars, + extra_kwargs=kwargs, + ) diff --git a/litellm/responses/utils.py b/litellm/responses/utils.py index 5e95cbd93a..cbcf8efed0 100644 --- a/litellm/responses/utils.py +++ b/litellm/responses/utils.py @@ -1,6 +1,8 @@ import base64 from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints +from typing_extensions import TypedDict + import litellm from litellm._logging import verbose_logger from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig @@ -9,6 +11,7 @@ from litellm.types.llms.openai import ( ResponsesAPIOptionalRequestParams, ResponsesAPIResponse, ) +from litellm.types.responses.main import DecodedResponseId from litellm.types.utils import SpecialEnums, Usage @@ -84,29 +87,35 @@ class ResponsesAPIRequestUtils: def _update_responses_api_response_id_with_model_id( responses_api_response: ResponsesAPIResponse, kwargs: Dict[str, Any], + custom_llm_provider: Optional[str], ) -> ResponsesAPIResponse: - """Update the responses_api_response_id with the model_id""" + """ + Update the responses_api_response_id with model_id and custom_llm_provider + + This builds a composite ID containing the custom LLM provider, model ID, and original response ID + """ litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {} model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {} model_id = model_info.get("id") updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id( model_id=model_id, + custom_llm_provider=custom_llm_provider, response_id=responses_api_response.id, ) + responses_api_response.id = updated_id return responses_api_response @staticmethod def _build_responses_api_response_id( + custom_llm_provider: Optional[str], model_id: Optional[str], response_id: str, ) -> str: """Build the responses_api_response_id""" - if model_id is None: - return response_id assembled_id: str = str( SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value - ).format(model_id, response_id) + ).format(custom_llm_provider, model_id, response_id) base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode( "utf-8" ) @@ -115,12 +124,12 @@ class ResponsesAPIRequestUtils: @staticmethod def _decode_responses_api_response_id( response_id: str, - ) -> Tuple[Optional[str], str]: + ) -> DecodedResponseId: """ Decode the responses_api_response_id Returns: - Tuple of model_id, response_id (from upstream provider) + DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id """ try: # Remove prefix and decode @@ -129,16 +138,45 @@ class ResponsesAPIRequestUtils: # Parse components using known prefixes if ";" not in decoded_id: - return None, response_id + return DecodedResponseId( + custom_llm_provider=None, + model_id=None, + response_id=response_id, + ) - model_part, response_part = decoded_id.split(";", 1) - model_id = model_part.replace("litellm:model_id:", "") - decoded_response_id = response_part.replace("response_id:", "") + parts = decoded_id.split(";") - return model_id, decoded_response_id + # Format: litellm:custom_llm_provider:{};model_id:{};response_id:{} + custom_llm_provider = None + model_id = None + + if ( + len(parts) >= 3 + ): # Full format with custom_llm_provider, model_id, and response_id + custom_llm_provider_part = parts[0] + model_id_part = parts[1] + response_part = parts[2] + + custom_llm_provider = custom_llm_provider_part.replace( + "litellm:custom_llm_provider:", "" + ) + model_id = model_id_part.replace("model_id:", "") + decoded_response_id = response_part.replace("response_id:", "") + else: + decoded_response_id = response_id + + return DecodedResponseId( + custom_llm_provider=custom_llm_provider, + model_id=model_id, + response_id=decoded_response_id, + ) except Exception as e: verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}") - return None, response_id + return DecodedResponseId( + custom_llm_provider=None, + model_id=None, + response_id=response_id, + ) class ResponseAPILoggingUtils: diff --git a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py index 445460c237..b030fc28c8 100644 --- a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py +++ b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py @@ -31,11 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger): if previous_response_id is None: return healthy_deployments - model_id, response_id = ( - ResponsesAPIRequestUtils._decode_responses_api_response_id( - response_id=previous_response_id, - ) + decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id( + response_id=previous_response_id, ) + model_id = decoded_response.get("model_id") if model_id is None: return healthy_deployments diff --git a/litellm/types/responses/main.py b/litellm/types/responses/main.py index cf62f0d863..b85df206bc 100644 --- a/litellm/types/responses/main.py +++ b/litellm/types/responses/main.py @@ -1,5 +1,6 @@ from typing import Literal +from pydantic import PrivateAttr from typing_extensions import Any, List, Optional, TypedDict from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject @@ -62,3 +63,14 @@ class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject): id: Optional[str] object: Optional[str] deleted: Optional[bool] + + # Define private attributes using PrivateAttr + _hidden_params: dict = PrivateAttr(default_factory=dict) + + +class DecodedResponseId(TypedDict, total=False): + """Structure representing a decoded response ID""" + + custom_llm_provider: Optional[str] + model_id: Optional[str] + response_id: str diff --git a/litellm/types/utils.py b/litellm/types/utils.py index e9859513b9..532162e60f 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2254,7 +2254,9 @@ class SpecialEnums(Enum): LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy" LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}" - LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}" + LITELLM_MANAGED_RESPONSE_COMPLETE_STR = ( + "litellm:custom_llm_provider:{};model_id:{};response_id:{}" + ) LLMResponseTypes = Union[ diff --git a/litellm/utils.py b/litellm/utils.py index 38e604943a..0150c4f43f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915 function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None ## DYNAMIC CALLBACKS ## - dynamic_callbacks: Optional[ - List[Union[str, Callable, CustomLogger]] - ] = kwargs.pop("callbacks", None) + dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = ( + kwargs.pop("callbacks", None) + ) all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) if len(all_callbacks) > 0: @@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915 exception=e, retry_policy=kwargs.get("retry_policy"), ) - kwargs[ - "retry_policy" - ] = reset_retry_policy() # prevent infinite loops + kwargs["retry_policy"] = ( + reset_retry_policy() + ) # prevent infinite loops litellm.num_retries = ( None # set retries to None to prevent infinite loops ) @@ -3028,16 +3028,16 @@ def get_optional_params( # noqa: PLR0915 True # so that main.py adds the function call to the prompt ) if "tools" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("tools") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("tools") + ) non_default_params.pop( "tool_choice", None ) # causes ollama requests to hang elif "functions" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("functions") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("functions") + ) elif ( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead @@ -3060,10 +3060,10 @@ def get_optional_params( # noqa: PLR0915 if "response_format" in non_default_params: if provider_config is not None: - non_default_params[ - "response_format" - ] = provider_config.get_json_schema_from_pydantic_object( - response_format=non_default_params["response_format"] + non_default_params["response_format"] = ( + provider_config.get_json_schema_from_pydantic_object( + response_format=non_default_params["response_format"] + ) ) else: non_default_params["response_format"] = type_to_response_format_param( @@ -4079,9 +4079,9 @@ def _count_characters(text: str) -> int: def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str: - _choices: Union[ - List[Union[Choices, StreamingChoices]], List[StreamingChoices] - ] = response_obj.choices + _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = ( + response_obj.choices + ) response_str = "" for choice in _choices: @@ -6625,8 +6625,8 @@ class ProviderConfigManager: @staticmethod def get_provider_responses_api_config( - model: str, provider: LlmProviders, + model: Optional[str] = None, ) -> Optional[BaseResponsesAPIConfig]: if litellm.LlmProviders.OPENAI == provider: return litellm.OpenAIResponsesAPIConfig() diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py index fd39c13604..56caa01f0c 100644 --- a/tests/llm_responses_api_testing/base_responses_api.py +++ b/tests/llm_responses_api_testing/base_responses_api.py @@ -189,6 +189,75 @@ class BaseResponsesAPITest(ABC): + @pytest.mark.parametrize("sync_mode", [True, False]) + @pytest.mark.asyncio + async def test_basic_openai_responses_delete_endpoint(self, sync_mode): + litellm._turn_on_debug() + litellm.set_verbose = True + base_completion_call_args = self.get_base_completion_call_args() + if sync_mode: + response = litellm.responses( + input="Basic ping", max_output_tokens=20, + **base_completion_call_args + ) + + # delete the response + if isinstance(response, ResponsesAPIResponse): + litellm.delete_responses( + response_id=response.id, + ) + else: + raise ValueError("response is not a ResponsesAPIResponse") + # else: + # response = await litellm.aresponses( + # input="Basic ping", max_output_tokens=20, + # **base_completion_call_args + # ) + + # # async delete the response + # await litellm.adelete_responses( + # response_id=response.id, + # ) + + + # @pytest.mark.parametrize("sync_mode", [True, False]) + # @pytest.mark.asyncio + # async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode): + # litellm._turn_on_debug() + # litellm.set_verbose = True + # base_completion_call_args = self.get_base_completion_call_args() + # if sync_mode: + # response_id = None + # response = litellm.responses( + # input="Basic ping", max_output_tokens=20, + # stream=True, + # **base_completion_call_args + # ) + # for event in response: + # if event.type == "response.completed": + # response_id = event.response.id + # break + + # # delete the response + # litellm.delete_responses( + # response_id=response_id, + # ) + # else: + # response = await litellm.aresponses( + # input="Basic ping", max_output_tokens=20, + # stream=True, + # **base_completion_call_args + # ) + # async for event in response: + # if event.type == "response.completed": + # response_id = event.response.id + # break + + # # async delete the response + # await litellm.adelete_responses( + # response_id=response_id, + # ) +