fixes for deleting responses, response API

2025-04-24 18:24:20 +00:00 · 2025-04-22 12:54:05 -07:00 · 2025-04-22 12:54:05 -07:00 · edebe69ac0
commit edebe69ac0
parent 63bde3dc73
9 changed files with 254 additions and 40 deletions
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -1259,10 +1259,10 @@ class BaseLLMHTTPHandler:
    def delete_response_api_handler(
        self,
        response_id: str,
-        custom_llm_provider: str,
        responses_api_provider_config: BaseResponsesAPIConfig,
        litellm_params: GenericLiteLLMParams,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str],
        extra_headers: Optional[Dict[str, Any]] = None,
        extra_body: Optional[Dict[str, Any]] = None,
        timeout: Optional[Union[float, httpx.Timeout]] = None,
@ -1313,7 +1313,7 @@ class BaseLLMHTTPHandler:

        try:
            response = sync_httpx_client.delete(
-                url=api_base, headers=headers, data=json.dumps(data), timeout=timeout
+                url=url, headers=headers, data=json.dumps(data), timeout=timeout
            )

        except Exception as e:
--- a/litellm/llms/openai/responses/transformation.py
+++ b/litellm/llms/openai/responses/transformation.py
@ -230,7 +230,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
        OpenAI API expects the following request
        - DELETE /v1/responses/{response_id}
        """
-        url = f"{api_base}/responses/{response_id}"
+        url = f"{api_base}/{response_id}"
        data = {}
        return url, data

--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@ -24,6 +24,7 @@ from litellm.types.llms.openai import (
    ToolChoice,
    ToolParam,
 )
+from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import ProviderConfigManager, client

@ -122,6 +123,7 @@ async def aresponses(
            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                responses_api_response=response,
                kwargs=kwargs,
+                custom_llm_provider=custom_llm_provider,
            )
        return response
    except Exception as e:
@ -260,6 +262,7 @@ def responses(
            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                responses_api_response=response,
                kwargs=kwargs,
+                custom_llm_provider=custom_llm_provider,
            )

        return response
@ -271,3 +274,94 @@ def responses(
            completion_kwargs=local_vars,
            extra_kwargs=kwargs,
        )
+
+
+@client
+def delete_responses(
+    response_id: str,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    # LiteLLM specific params,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+) -> DeleteResponseResult:
+    """
+    Synchronous version of the DELETE Responses API
+
+    DELETE /v1/responses/{response_id} endpoint in the responses API
+
+    """
+    local_vars = locals()
+    try:
+        litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
+        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
+        _is_async = kwargs.pop("aresponses", False) is True
+
+        # get llm provider logic
+        litellm_params = GenericLiteLLMParams(**kwargs)
+
+        # get custom llm provider from response_id
+        decoded_response_id: DecodedResponseId = (
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
+                response_id=response_id,
+            )
+        )
+        response_id = decoded_response_id.get("response_id") or response_id
+        custom_llm_provider = (
+            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
+        )
+
+        # get provider config
+        responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
+            ProviderConfigManager.get_provider_responses_api_config(
+                model=None,
+                provider=litellm.LlmProviders(custom_llm_provider),
+            )
+        )
+
+        if responses_api_provider_config is None:
+            raise ValueError(
+                f"DELETE responses is not supported for {custom_llm_provider}"
+            )
+
+        local_vars.update(kwargs)
+
+        # Pre Call logging
+        litellm_logging_obj.update_environment_variables(
+            model=None,
+            optional_params={
+                "response_id": response_id,
+            },
+            litellm_params={
+                "litellm_call_id": litellm_call_id,
+            },
+            custom_llm_provider=custom_llm_provider,
+        )
+
+        # Call the handler with _is_async flag instead of directly calling the async handler
+        response = base_llm_http_handler.delete_response_api_handler(
+            response_id=response_id,
+            custom_llm_provider=custom_llm_provider,
+            responses_api_provider_config=responses_api_provider_config,
+            litellm_params=litellm_params,
+            logging_obj=litellm_logging_obj,
+            extra_headers=extra_headers,
+            extra_body=extra_body,
+            timeout=timeout or request_timeout,
+            _is_async=_is_async,
+            client=kwargs.get("client"),
+        )
+
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=None,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
--- a/litellm/responses/utils.py
+++ b/litellm/responses/utils.py
@ -1,6 +1,8 @@
 import base64
 from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints

+from typing_extensions import TypedDict
+
 import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
@ -9,6 +11,7 @@ from litellm.types.llms.openai import (
    ResponsesAPIOptionalRequestParams,
    ResponsesAPIResponse,
 )
+from litellm.types.responses.main import DecodedResponseId
 from litellm.types.utils import SpecialEnums, Usage


@ -84,29 +87,35 @@ class ResponsesAPIRequestUtils:
    def _update_responses_api_response_id_with_model_id(
        responses_api_response: ResponsesAPIResponse,
        kwargs: Dict[str, Any],
+        custom_llm_provider: Optional[str],
    ) -> ResponsesAPIResponse:
-        """Update the responses_api_response_id with the model_id"""
+        """
+        Update the responses_api_response_id with model_id and custom_llm_provider
+
+        This builds a composite ID containing the custom LLM provider, model ID, and original response ID
+        """
        litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
        model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
        model_id = model_info.get("id")
        updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
            model_id=model_id,
+            custom_llm_provider=custom_llm_provider,
            response_id=responses_api_response.id,
        )
+
        responses_api_response.id = updated_id
        return responses_api_response

    @staticmethod
    def _build_responses_api_response_id(
+        custom_llm_provider: Optional[str],
        model_id: Optional[str],
        response_id: str,
    ) -> str:
        """Build the responses_api_response_id"""
-        if model_id is None:
-            return response_id
        assembled_id: str = str(
            SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
-        ).format(model_id, response_id)
+        ).format(custom_llm_provider, model_id, response_id)
        base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
            "utf-8"
        )
@ -115,12 +124,12 @@ class ResponsesAPIRequestUtils:
    @staticmethod
    def _decode_responses_api_response_id(
        response_id: str,
-    ) -> Tuple[Optional[str], str]:
+    ) -> DecodedResponseId:
        """
        Decode the responses_api_response_id

        Returns:
-            Tuple of model_id, response_id (from upstream provider)
+            DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
        """
        try:
            # Remove prefix and decode
@ -129,16 +138,45 @@ class ResponsesAPIRequestUtils:

            # Parse components using known prefixes
            if ";" not in decoded_id:
-                return None, response_id
+                return DecodedResponseId(
+                    custom_llm_provider=None,
+                    model_id=None,
+                    response_id=response_id,
+                )

-            model_part, response_part = decoded_id.split(";", 1)
-            model_id = model_part.replace("litellm:model_id:", "")
-            decoded_response_id = response_part.replace("response_id:", "")
+            parts = decoded_id.split(";")

-            return model_id, decoded_response_id
+            # Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
+            custom_llm_provider = None
+            model_id = None
+
+            if (
+                len(parts) >= 3
+            ):  # Full format with custom_llm_provider, model_id, and response_id
+                custom_llm_provider_part = parts[0]
+                model_id_part = parts[1]
+                response_part = parts[2]
+
+                custom_llm_provider = custom_llm_provider_part.replace(
+                    "litellm:custom_llm_provider:", ""
+                )
+                model_id = model_id_part.replace("model_id:", "")
+                decoded_response_id = response_part.replace("response_id:", "")
+            else:
+                decoded_response_id = response_id
+
+            return DecodedResponseId(
+                custom_llm_provider=custom_llm_provider,
+                model_id=model_id,
+                response_id=decoded_response_id,
+            )
        except Exception as e:
            verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
-            return None, response_id
+            return DecodedResponseId(
+                custom_llm_provider=None,
+                model_id=None,
+                response_id=response_id,
+            )


 class ResponseAPILoggingUtils:
--- a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
+++ b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
@ -31,11 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
        if previous_response_id is None:
            return healthy_deployments

-        model_id, response_id = (
-            ResponsesAPIRequestUtils._decode_responses_api_response_id(
-                response_id=previous_response_id,
-            )
+        decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id(
+            response_id=previous_response_id,
        )
+        model_id = decoded_response.get("model_id")
        if model_id is None:
            return healthy_deployments

--- a/litellm/types/responses/main.py
+++ b/litellm/types/responses/main.py
@ -1,5 +1,6 @@
 from typing import Literal

+from pydantic import PrivateAttr
 from typing_extensions import Any, List, Optional, TypedDict

 from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
@ -62,3 +63,14 @@ class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject):
    id: Optional[str]
    object: Optional[str]
    deleted: Optional[bool]
+
+    # Define private attributes using PrivateAttr
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class DecodedResponseId(TypedDict, total=False):
+    """Structure representing a decoded response ID"""
+
+    custom_llm_provider: Optional[str]
+    model_id: Optional[str]
+    response_id: str
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -2254,7 +2254,9 @@ class SpecialEnums(Enum):
    LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
    LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"

-    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}"
+    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = (
+        "litellm:custom_llm_provider:{};model_id:{};response_id:{}"
+    )


 LLMResponseTypes = Union[
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -516,9 +516,9 @@ def function_setup(  # noqa: PLR0915
        function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None

        ## DYNAMIC CALLBACKS ##
-        dynamic_callbacks: Optional[
-            List[Union[str, Callable, CustomLogger]]
-        ] = kwargs.pop("callbacks", None)
+        dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
+            kwargs.pop("callbacks", None)
+        )
        all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)

        if len(all_callbacks) > 0:
@ -1202,9 +1202,9 @@ def client(original_function):  # noqa: PLR0915
                        exception=e,
                        retry_policy=kwargs.get("retry_policy"),
                    )
-                    kwargs[
-                        "retry_policy"
-                    ] = reset_retry_policy()  # prevent infinite loops
+                    kwargs["retry_policy"] = (
+                        reset_retry_policy()
+                    )  # prevent infinite loops
                litellm.num_retries = (
                    None  # set retries to None to prevent infinite loops
                )
@ -3028,16 +3028,16 @@ def get_optional_params(  # noqa: PLR0915
                    True  # so that main.py adds the function call to the prompt
                )
                if "tools" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("tools")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("tools")
+                    )
                    non_default_params.pop(
                        "tool_choice", None
                    )  # causes ollama requests to hang
                elif "functions" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("functions")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("functions")
+                    )
            elif (
                litellm.add_function_to_prompt
            ):  # if user opts to add it to prompt instead
@ -3060,10 +3060,10 @@ def get_optional_params(  # noqa: PLR0915

    if "response_format" in non_default_params:
        if provider_config is not None:
-            non_default_params[
-                "response_format"
-            ] = provider_config.get_json_schema_from_pydantic_object(
-                response_format=non_default_params["response_format"]
+            non_default_params["response_format"] = (
+                provider_config.get_json_schema_from_pydantic_object(
+                    response_format=non_default_params["response_format"]
+                )
            )
        else:
            non_default_params["response_format"] = type_to_response_format_param(
@ -4079,9 +4079,9 @@ def _count_characters(text: str) -> int:


 def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
-    _choices: Union[
-        List[Union[Choices, StreamingChoices]], List[StreamingChoices]
-    ] = response_obj.choices
+    _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
+        response_obj.choices
+    )

    response_str = ""
    for choice in _choices:
@ -6625,8 +6625,8 @@ class ProviderConfigManager:

    @staticmethod
    def get_provider_responses_api_config(
-        model: str,
        provider: LlmProviders,
+        model: Optional[str] = None,
    ) -> Optional[BaseResponsesAPIConfig]:
        if litellm.LlmProviders.OPENAI == provider:
            return litellm.OpenAIResponsesAPIConfig()
--- a/tests/llm_responses_api_testing/base_responses_api.py
+++ b/tests/llm_responses_api_testing/base_responses_api.py
@ -189,6 +189,75 @@ class BaseResponsesAPITest(ABC):



+    @pytest.mark.parametrize("sync_mode", [True, False])
+    @pytest.mark.asyncio
+    async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        if sync_mode:
+            response = litellm.responses(
+                input="Basic ping", max_output_tokens=20,
+                **base_completion_call_args
+            )
+
+            # delete the response
+            if isinstance(response, ResponsesAPIResponse):
+                litellm.delete_responses(
+                    response_id=response.id,
+                )
+            else:
+                raise ValueError("response is not a ResponsesAPIResponse")
+        # else:
+        #     response = await litellm.aresponses(
+        #         input="Basic ping", max_output_tokens=20,
+        #         **base_completion_call_args
+        #     )
+
+        #     # async delete the response
+        #     await litellm.adelete_responses(
+        #         response_id=response.id,
+        #     )
+    
+
+    # @pytest.mark.parametrize("sync_mode", [True, False])
+    # @pytest.mark.asyncio
+    # async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
+    #     litellm._turn_on_debug()
+    #     litellm.set_verbose = True
+    #     base_completion_call_args = self.get_base_completion_call_args()
+    #     if sync_mode:
+    #         response_id = None
+    #         response = litellm.responses(
+    #             input="Basic ping", max_output_tokens=20,
+    #             stream=True,
+    #             **base_completion_call_args
+    #         )
+    #         for event in response:
+    #             if event.type == "response.completed":
+    #                 response_id = event.response.id
+    #                 break
+
+    #         # delete the response
+    #         litellm.delete_responses(
+    #             response_id=response_id,
+    #         )
+    #     else:
+    #         response = await litellm.aresponses(
+    #             input="Basic ping", max_output_tokens=20,
+    #             stream=True,
+    #             **base_completion_call_args
+    #         )
+    #         async for event in response:
+    #             if event.type == "response.completed":
+    #                 response_id = event.response.id
+    #                 break
+
+    #         # async delete the response
+    #         await litellm.adelete_responses(
+    #             response_id=response_id,
+    #         )
+