diff --git a/litellm/llms/azure/responses/transformation.py b/litellm/llms/azure/responses/transformation.py
index a85ba73bec..499d21cb0e 100644
--- a/litellm/llms/azure/responses/transformation.py
+++ b/litellm/llms/azure/responses/transformation.py
@@ -1,11 +1,14 @@
-from typing import TYPE_CHECKING, Any, Optional, cast
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast
 
 import httpx
 
 import litellm
+from litellm._logging import verbose_logger
 from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import *
+from litellm.types.responses.main import *
+from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import _add_path_to_api_base
 
 if TYPE_CHECKING:
@@ -41,11 +44,7 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
     def get_complete_url(
         self,
         api_base: Optional[str],
-        api_key: Optional[str],
-        model: str,
-        optional_params: dict,
         litellm_params: dict,
-        stream: Optional[bool] = None,
     ) -> str:
         """
         Constructs a complete URL for the API request.
@@ -92,3 +91,48 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
         final_url = httpx.URL(new_url).copy_with(params=query_params)
 
         return str(final_url)
+
+    #########################################################
+    ########## DELETE RESPONSE API TRANSFORMATION ##############
+    #########################################################
+    def transform_delete_response_api_request(
+        self,
+        response_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Transform the delete response API request into a URL and data
+
+        Azure OpenAI API expects the following request:
+        - DELETE /openai/responses/{response_id}?api-version=xxx
+
+        This function handles URLs with query parameters by inserting the response_id
+        at the correct location (before any query parameters).
+        """
+        from urllib.parse import urlparse, urlunparse
+
+        # Parse the URL to separate its components
+        parsed_url = urlparse(api_base)
+
+        # Insert the response_id at the end of the path component
+        # Remove trailing slash if present to avoid double slashes
+        path = parsed_url.path.rstrip("/")
+        new_path = f"{path}/{response_id}"
+
+        # Reconstruct the URL with all original components but with the modified path
+        delete_url = urlunparse(
+            (
+                parsed_url.scheme,  # http, https
+                parsed_url.netloc,  # domain name, port
+                new_path,  # path with response_id added
+                parsed_url.params,  # parameters
+                parsed_url.query,  # query string
+                parsed_url.fragment,  # fragment
+            )
+        )
+
+        data: Dict = {}
+        verbose_logger.debug(f"delete response url={delete_url}")
+        return delete_url, data
diff --git a/litellm/llms/base_llm/responses/transformation.py b/litellm/llms/base_llm/responses/transformation.py
index 649b91226f..15ce8cba3f 100644
--- a/litellm/llms/base_llm/responses/transformation.py
+++ b/litellm/llms/base_llm/responses/transformation.py
@@ -1,6 +1,6 @@
 import types
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, Dict, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
 
 import httpx
 
@@ -10,6 +10,7 @@ from litellm.types.llms.openai import (
     ResponsesAPIResponse,
     ResponsesAPIStreamingResponse,
 )
+from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams
 
 if TYPE_CHECKING:
@@ -73,11 +74,7 @@ class BaseResponsesAPIConfig(ABC):
     def get_complete_url(
         self,
         api_base: Optional[str],
-        api_key: Optional[str],
-        model: str,
-        optional_params: dict,
         litellm_params: dict,
-        stream: Optional[bool] = None,
     ) -> str:
         """
         OPTIONAL
@@ -122,6 +119,31 @@ class BaseResponsesAPIConfig(ABC):
         """
         pass
 
+    #########################################################
+    ########## DELETE RESPONSE API TRANSFORMATION ##############
+    #########################################################
+    @abstractmethod
+    def transform_delete_response_api_request(
+        self,
+        response_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        pass
+
+    @abstractmethod
+    def transform_delete_response_api_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> DeleteResponseResult:
+        pass
+
+    #########################################################
+    ########## END DELETE RESPONSE API TRANSFORMATION ##########
+    #########################################################
+
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
     ) -> BaseLLMException:
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
index 627dd8c9f9..f99e04ab9d 100644
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@@ -650,6 +650,49 @@ class HTTPHandler:
         except Exception as e:
             raise e
 
+    def delete(
+        self,
+        url: str,
+        data: Optional[Union[dict, str]] = None,  # type: ignore
+        json: Optional[dict] = None,
+        params: Optional[dict] = None,
+        headers: Optional[dict] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        stream: bool = False,
+    ):
+        try:
+            if timeout is not None:
+                req = self.client.build_request(
+                    "DELETE", url, data=data, json=json, params=params, headers=headers, timeout=timeout  # type: ignore
+                )
+            else:
+                req = self.client.build_request(
+                    "DELETE", url, data=data, json=json, params=params, headers=headers  # type: ignore
+                )
+            response = self.client.send(req, stream=stream)
+            response.raise_for_status()
+            return response
+        except httpx.TimeoutException:
+            raise litellm.Timeout(
+                message=f"Connection timed out after {timeout} seconds.",
+                model="default-model-name",
+                llm_provider="litellm-httpx-handler",
+            )
+        except httpx.HTTPStatusError as e:
+            if stream is True:
+                setattr(e, "message", mask_sensitive_info(e.response.read()))
+                setattr(e, "text", mask_sensitive_info(e.response.read()))
+            else:
+                error_text = mask_sensitive_info(e.response.text)
+                setattr(e, "message", error_text)
+                setattr(e, "text", error_text)
+
+            setattr(e, "status_code", e.response.status_code)
+
+            raise e
+        except Exception as e:
+            raise e
+
     def __del__(self) -> None:
         try:
             self.close()
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index c7b18215d0..1958ef0b60 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -36,6 +36,7 @@ from litellm.types.llms.openai import (
     ResponsesAPIResponse,
 )
 from litellm.types.rerank import OptionalRerankParams, RerankResponse
+from litellm.types.responses.main import DeleteResponseResult
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
 from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
@@ -1015,6 +1016,7 @@ class BaseLLMHTTPHandler:
         client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
         _is_async: bool = False,
         fake_stream: bool = False,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
     ) -> Union[
         ResponsesAPIResponse,
         BaseResponsesAPIStreamingIterator,
@@ -1041,6 +1043,7 @@ class BaseLLMHTTPHandler:
                 timeout=timeout,
                 client=client if isinstance(client, AsyncHTTPHandler) else None,
                 fake_stream=fake_stream,
+                litellm_metadata=litellm_metadata,
             )
 
         if client is None or not isinstance(client, HTTPHandler):
@@ -1064,11 +1067,7 @@ class BaseLLMHTTPHandler:
 
         api_base = responses_api_provider_config.get_complete_url(
             api_base=litellm_params.api_base,
-            api_key=litellm_params.api_key,
-            model=model,
-            optional_params=response_api_optional_request_params,
             litellm_params=dict(litellm_params),
-            stream=stream,
         )
 
         data = responses_api_provider_config.transform_responses_api_request(
@@ -1113,6 +1112,8 @@ class BaseLLMHTTPHandler:
                         model=model,
                         logging_obj=logging_obj,
                         responses_api_provider_config=responses_api_provider_config,
+                        litellm_metadata=litellm_metadata,
+                        custom_llm_provider=custom_llm_provider,
                     )
 
                 return SyncResponsesAPIStreamingIterator(
@@ -1120,6 +1121,8 @@ class BaseLLMHTTPHandler:
                     model=model,
                     logging_obj=logging_obj,
                     responses_api_provider_config=responses_api_provider_config,
+                    litellm_metadata=litellm_metadata,
+                    custom_llm_provider=custom_llm_provider,
                 )
             else:
                 # For non-streaming requests
@@ -1156,6 +1159,7 @@ class BaseLLMHTTPHandler:
         timeout: Optional[Union[float, httpx.Timeout]] = None,
         client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
         fake_stream: bool = False,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
     ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
         """
         Async version of the responses API handler.
@@ -1183,11 +1187,7 @@ class BaseLLMHTTPHandler:
 
         api_base = responses_api_provider_config.get_complete_url(
             api_base=litellm_params.api_base,
-            api_key=litellm_params.api_key,
-            model=model,
-            optional_params=response_api_optional_request_params,
             litellm_params=dict(litellm_params),
-            stream=stream,
         )
 
         data = responses_api_provider_config.transform_responses_api_request(
@@ -1234,6 +1234,8 @@ class BaseLLMHTTPHandler:
                         model=model,
                         logging_obj=logging_obj,
                         responses_api_provider_config=responses_api_provider_config,
+                        litellm_metadata=litellm_metadata,
+                        custom_llm_provider=custom_llm_provider,
                     )
 
                 # Return the streaming iterator
@@ -1242,6 +1244,8 @@ class BaseLLMHTTPHandler:
                     model=model,
                     logging_obj=logging_obj,
                     responses_api_provider_config=responses_api_provider_config,
+                    litellm_metadata=litellm_metadata,
+                    custom_llm_provider=custom_llm_provider,
                 )
             else:
                 # For non-streaming, proceed as before
@@ -1265,6 +1269,163 @@ class BaseLLMHTTPHandler:
             logging_obj=logging_obj,
         )
 
+    async def async_delete_response_api_handler(
+        self,
+        response_id: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str],
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> DeleteResponseResult:
+        """
+        Async version of the delete response API handler.
+        Uses async HTTP client to make requests.
+        """
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = responses_api_provider_config.validate_environment(
+            api_key=litellm_params.api_key,
+            headers=extra_headers or {},
+            model="None",
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = responses_api_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, data = responses_api_provider_config.transform_delete_response_api_request(
+            response_id=response_id,
+            api_base=api_base,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key="",
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await async_httpx_client.delete(
+                url=url, headers=headers, data=json.dumps(data), timeout=timeout
+            )
+
+        except Exception as e:
+            raise self._handle_error(
+                e=e,
+                provider_config=responses_api_provider_config,
+            )
+
+        return responses_api_provider_config.transform_delete_response_api_response(
+            raw_response=response,
+            logging_obj=logging_obj,
+        )
+
+    def delete_response_api_handler(
+        self,
+        response_id: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str],
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
+        """
+        Async version of the responses API handler.
+        Uses async HTTP client to make requests.
+        """
+        if _is_async:
+            return self.async_delete_response_api_handler(
+                response_id=response_id,
+                responses_api_provider_config=responses_api_provider_config,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                custom_llm_provider=custom_llm_provider,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                client=client,
+            )
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = responses_api_provider_config.validate_environment(
+            api_key=litellm_params.api_key,
+            headers=extra_headers or {},
+            model="None",
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = responses_api_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            litellm_params=dict(litellm_params),
+        )
+
+        url, data = responses_api_provider_config.transform_delete_response_api_request(
+            response_id=response_id,
+            api_base=api_base,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key="",
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = sync_httpx_client.delete(
+                url=url, headers=headers, data=json.dumps(data), timeout=timeout
+            )
+
+        except Exception as e:
+            raise self._handle_error(
+                e=e,
+                provider_config=responses_api_provider_config,
+            )
+
+        return responses_api_provider_config.transform_delete_response_api_response(
+            raw_response=response,
+            logging_obj=logging_obj,
+        )
+
     def create_file(
         self,
         create_file_data: CreateFileRequest,
diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py
index 047572657c..d4a443aedb 100644
--- a/litellm/llms/openai/responses/transformation.py
+++ b/litellm/llms/openai/responses/transformation.py
@@ -7,6 +7,7 @@ from litellm._logging import verbose_logger
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import *
+from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams
 
 from ..common_utils import OpenAIError
@@ -110,11 +111,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
     def get_complete_url(
         self,
         api_base: Optional[str],
-        api_key: Optional[str],
-        model: str,
-        optional_params: dict,
         litellm_params: dict,
-        stream: Optional[bool] = None,
     ) -> str:
         """
         Get the endpoint for OpenAI responses API
@@ -217,3 +214,39 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
                     f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
                 )
         return False
+
+    #########################################################
+    ########## DELETE RESPONSE API TRANSFORMATION ##############
+    #########################################################
+    def transform_delete_response_api_request(
+        self,
+        response_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Transform the delete response API request into a URL and data
+
+        OpenAI API expects the following request
+        - DELETE /v1/responses/{response_id}
+        """
+        url = f"{api_base}/{response_id}"
+        data: Dict = {}
+        return url, data
+
+    def transform_delete_response_api_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> DeleteResponseResult:
+        """
+        Transform the delete response API response into a DeleteResponseResult
+        """
+        try:
+            raw_response_json = raw_response.json()
+        except Exception:
+            raise OpenAIError(
+                message=raw_response.text, status_code=raw_response.status_code
+            )
+        return DeleteResponseResult(**raw_response_json)
diff --git a/litellm/responses/main.py b/litellm/responses/main.py
index 2d7426205e..004a19a0ae 100644
--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@@ -1,7 +1,7 @@
 import asyncio
 import contextvars
 from functools import partial
-from typing import Any, Dict, Iterable, List, Literal, Optional, Union
+from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
 
 import httpx
 
@@ -24,6 +24,7 @@ from litellm.types.llms.openai import (
     ToolChoice,
     ToolParam,
 )
+from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import ProviderConfigManager, client
 
@@ -121,7 +122,8 @@ async def aresponses(
         if isinstance(response, ResponsesAPIResponse):
             response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                 responses_api_response=response,
-                kwargs=kwargs,
+                litellm_metadata=kwargs.get("litellm_metadata", {}),
+                custom_llm_provider=custom_llm_provider,
             )
         return response
     except Exception as e:
@@ -253,13 +255,15 @@ def responses(
             fake_stream=responses_api_provider_config.should_fake_stream(
                 model=model, stream=stream, custom_llm_provider=custom_llm_provider
             ),
+            litellm_metadata=kwargs.get("litellm_metadata", {}),
         )
 
         # Update the responses_api_response_id with the model_id
         if isinstance(response, ResponsesAPIResponse):
             response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                 responses_api_response=response,
-                kwargs=kwargs,
+                litellm_metadata=kwargs.get("litellm_metadata", {}),
+                custom_llm_provider=custom_llm_provider,
             )
 
         return response
@@ -271,3 +275,162 @@ def responses(
             completion_kwargs=local_vars,
             extra_kwargs=kwargs,
         )
+
+
+@client
+async def adelete_responses(
+    response_id: str,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    # LiteLLM specific params,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+) -> DeleteResponseResult:
+    """
+    Async version of the DELETE Responses API
+
+    DELETE /v1/responses/{response_id} endpoint in the responses API
+
+    """
+    local_vars = locals()
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["adelete_responses"] = True
+
+        # get custom llm provider from response_id
+        decoded_response_id: DecodedResponseId = (
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
+                response_id=response_id,
+            )
+        )
+        response_id = decoded_response_id.get("response_id") or response_id
+        custom_llm_provider = (
+            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
+        )
+
+        func = partial(
+            delete_responses,
+            response_id=response_id,
+            custom_llm_provider=custom_llm_provider,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=None,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
+
+
+@client
+def delete_responses(
+    response_id: str,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    # LiteLLM specific params,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
+    """
+    Synchronous version of the DELETE Responses API
+
+    DELETE /v1/responses/{response_id} endpoint in the responses API
+
+    """
+    local_vars = locals()
+    try:
+        litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
+        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
+        _is_async = kwargs.pop("adelete_responses", False) is True
+
+        # get llm provider logic
+        litellm_params = GenericLiteLLMParams(**kwargs)
+
+        # get custom llm provider from response_id
+        decoded_response_id: DecodedResponseId = (
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
+                response_id=response_id,
+            )
+        )
+        response_id = decoded_response_id.get("response_id") or response_id
+        custom_llm_provider = (
+            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
+        )
+
+        if custom_llm_provider is None:
+            raise ValueError("custom_llm_provider is required but passed as None")
+
+        # get provider config
+        responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
+            ProviderConfigManager.get_provider_responses_api_config(
+                model=None,
+                provider=litellm.LlmProviders(custom_llm_provider),
+            )
+        )
+
+        if responses_api_provider_config is None:
+            raise ValueError(
+                f"DELETE responses is not supported for {custom_llm_provider}"
+            )
+
+        local_vars.update(kwargs)
+
+        # Pre Call logging
+        litellm_logging_obj.update_environment_variables(
+            model=None,
+            optional_params={
+                "response_id": response_id,
+            },
+            litellm_params={
+                "litellm_call_id": litellm_call_id,
+            },
+            custom_llm_provider=custom_llm_provider,
+        )
+
+        # Call the handler with _is_async flag instead of directly calling the async handler
+        response = base_llm_http_handler.delete_response_api_handler(
+            response_id=response_id,
+            custom_llm_provider=custom_llm_provider,
+            responses_api_provider_config=responses_api_provider_config,
+            litellm_params=litellm_params,
+            logging_obj=litellm_logging_obj,
+            extra_headers=extra_headers,
+            extra_body=extra_body,
+            timeout=timeout or request_timeout,
+            _is_async=_is_async,
+            client=kwargs.get("client"),
+        )
+
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=None,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py
index e050c47080..3e12761ba0 100644
--- a/litellm/responses/streaming_iterator.py
+++ b/litellm/responses/streaming_iterator.py
@@ -1,7 +1,7 @@
 import asyncio
 import json
 from datetime import datetime
-from typing import Optional
+from typing import Any, Dict, Optional
 
 import httpx
 
@@ -10,6 +10,7 @@ from litellm.litellm_core_utils.asyncify import run_async_function
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.responses.utils import ResponsesAPIRequestUtils
 from litellm.types.llms.openai import (
     OutputTextDeltaEvent,
     ResponseCompletedEvent,
@@ -33,6 +34,8 @@ class BaseResponsesAPIStreamingIterator:
         model: str,
         responses_api_provider_config: BaseResponsesAPIConfig,
         logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
     ):
         self.response = response
         self.model = model
@@ -42,6 +45,10 @@ class BaseResponsesAPIStreamingIterator:
         self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
         self.start_time = datetime.now()
 
+        # set request kwargs
+        self.litellm_metadata = litellm_metadata
+        self.custom_llm_provider = custom_llm_provider
+
     def _process_chunk(self, chunk):
         """Process a single chunk of data from the stream"""
         if not chunk:
@@ -70,6 +77,17 @@ class BaseResponsesAPIStreamingIterator:
                         logging_obj=self.logging_obj,
                     )
                 )
+
+                # if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider
+                response_object = getattr(openai_responses_api_chunk, "response", None)
+                if response_object:
+                    response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
+                        responses_api_response=response_object,
+                        litellm_metadata=self.litellm_metadata,
+                        custom_llm_provider=self.custom_llm_provider,
+                    )
+                    setattr(openai_responses_api_chunk, "response", response)
+
                 # Store the completed response
                 if (
                     openai_responses_api_chunk
@@ -102,8 +120,17 @@ class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
         model: str,
         responses_api_provider_config: BaseResponsesAPIConfig,
         logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
     ):
-        super().__init__(response, model, responses_api_provider_config, logging_obj)
+        super().__init__(
+            response,
+            model,
+            responses_api_provider_config,
+            logging_obj,
+            litellm_metadata,
+            custom_llm_provider,
+        )
         self.stream_iterator = response.aiter_lines()
 
     def __aiter__(self):
@@ -163,8 +190,17 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
         model: str,
         responses_api_provider_config: BaseResponsesAPIConfig,
         logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
     ):
-        super().__init__(response, model, responses_api_provider_config, logging_obj)
+        super().__init__(
+            response,
+            model,
+            responses_api_provider_config,
+            logging_obj,
+            litellm_metadata,
+            custom_llm_provider,
+        )
         self.stream_iterator = response.iter_lines()
 
     def __iter__(self):
@@ -228,12 +264,16 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
         model: str,
         responses_api_provider_config: BaseResponsesAPIConfig,
         logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
     ):
         super().__init__(
             response=response,
             model=model,
             responses_api_provider_config=responses_api_provider_config,
             logging_obj=logging_obj,
+            litellm_metadata=litellm_metadata,
+            custom_llm_provider=custom_llm_provider,
         )
 
         # one-time transform
diff --git a/litellm/responses/utils.py b/litellm/responses/utils.py
index 5e95cbd93a..9fa455de71 100644
--- a/litellm/responses/utils.py
+++ b/litellm/responses/utils.py
@@ -1,5 +1,5 @@
 import base64
-from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints
+from typing import Any, Dict, Optional, Union, cast, get_type_hints
 
 import litellm
 from litellm._logging import verbose_logger
@@ -9,6 +9,7 @@ from litellm.types.llms.openai import (
     ResponsesAPIOptionalRequestParams,
     ResponsesAPIResponse,
 )
+from litellm.types.responses.main import DecodedResponseId
 from litellm.types.utils import SpecialEnums, Usage
 
 
@@ -83,30 +84,36 @@ class ResponsesAPIRequestUtils:
     @staticmethod
     def _update_responses_api_response_id_with_model_id(
         responses_api_response: ResponsesAPIResponse,
-        kwargs: Dict[str, Any],
+        custom_llm_provider: Optional[str],
+        litellm_metadata: Optional[Dict[str, Any]] = None,
     ) -> ResponsesAPIResponse:
-        """Update the responses_api_response_id with the model_id"""
-        litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
+        """
+        Update the responses_api_response_id with model_id and custom_llm_provider
+
+        This builds a composite ID containing the custom LLM provider, model ID, and original response ID
+        """
+        litellm_metadata = litellm_metadata or {}
         model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
         model_id = model_info.get("id")
         updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
             model_id=model_id,
+            custom_llm_provider=custom_llm_provider,
             response_id=responses_api_response.id,
         )
+
         responses_api_response.id = updated_id
         return responses_api_response
 
     @staticmethod
     def _build_responses_api_response_id(
+        custom_llm_provider: Optional[str],
         model_id: Optional[str],
         response_id: str,
     ) -> str:
         """Build the responses_api_response_id"""
-        if model_id is None:
-            return response_id
         assembled_id: str = str(
             SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
-        ).format(model_id, response_id)
+        ).format(custom_llm_provider, model_id, response_id)
         base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
             "utf-8"
         )
@@ -115,12 +122,12 @@ class ResponsesAPIRequestUtils:
     @staticmethod
     def _decode_responses_api_response_id(
         response_id: str,
-    ) -> Tuple[Optional[str], str]:
+    ) -> DecodedResponseId:
         """
         Decode the responses_api_response_id
 
         Returns:
-            Tuple of model_id, response_id (from upstream provider)
+            DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
         """
         try:
             # Remove prefix and decode
@@ -129,16 +136,45 @@ class ResponsesAPIRequestUtils:
 
             # Parse components using known prefixes
             if ";" not in decoded_id:
-                return None, response_id
+                return DecodedResponseId(
+                    custom_llm_provider=None,
+                    model_id=None,
+                    response_id=response_id,
+                )
 
-            model_part, response_part = decoded_id.split(";", 1)
-            model_id = model_part.replace("litellm:model_id:", "")
-            decoded_response_id = response_part.replace("response_id:", "")
+            parts = decoded_id.split(";")
 
-            return model_id, decoded_response_id
+            # Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
+            custom_llm_provider = None
+            model_id = None
+
+            if (
+                len(parts) >= 3
+            ):  # Full format with custom_llm_provider, model_id, and response_id
+                custom_llm_provider_part = parts[0]
+                model_id_part = parts[1]
+                response_part = parts[2]
+
+                custom_llm_provider = custom_llm_provider_part.replace(
+                    "litellm:custom_llm_provider:", ""
+                )
+                model_id = model_id_part.replace("model_id:", "")
+                decoded_response_id = response_part.replace("response_id:", "")
+            else:
+                decoded_response_id = response_id
+
+            return DecodedResponseId(
+                custom_llm_provider=custom_llm_provider,
+                model_id=model_id,
+                response_id=decoded_response_id,
+            )
         except Exception as e:
             verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
-            return None, response_id
+            return DecodedResponseId(
+                custom_llm_provider=None,
+                model_id=None,
+                response_id=response_id,
+            )
 
 
 class ResponseAPILoggingUtils:
diff --git a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
index 445460c237..b030fc28c8 100644
--- a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
+++ b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
@@ -31,11 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
         if previous_response_id is None:
             return healthy_deployments
 
-        model_id, response_id = (
-            ResponsesAPIRequestUtils._decode_responses_api_response_id(
-                response_id=previous_response_id,
-            )
+        decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id(
+            response_id=previous_response_id,
         )
+        model_id = decoded_response.get("model_id")
         if model_id is None:
             return healthy_deployments
 
diff --git a/litellm/types/responses/main.py b/litellm/types/responses/main.py
index 63a548bbfd..b85df206bc 100644
--- a/litellm/types/responses/main.py
+++ b/litellm/types/responses/main.py
@@ -1,5 +1,6 @@
 from typing import Literal
 
+from pydantic import PrivateAttr
 from typing_extensions import Any, List, Optional, TypedDict
 
 from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
@@ -46,3 +47,30 @@ class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
     status: str  # "completed", "in_progress", etc.
     role: str  # "assistant", "user", etc.
     content: List[OutputText]
+
+
+class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject):
+    """
+    Result of a delete response request
+
+    {
+        "id": "resp_6786a1bec27481909a17d673315b29f6",
+        "object": "response",
+        "deleted": true
+    }
+    """
+
+    id: Optional[str]
+    object: Optional[str]
+    deleted: Optional[bool]
+
+    # Define private attributes using PrivateAttr
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class DecodedResponseId(TypedDict, total=False):
+    """Structure representing a decoded response ID"""
+
+    custom_llm_provider: Optional[str]
+    model_id: Optional[str]
+    response_id: str
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index e9859513b9..532162e60f 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -2254,7 +2254,9 @@ class SpecialEnums(Enum):
     LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
     LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
 
-    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}"
+    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = (
+        "litellm:custom_llm_provider:{};model_id:{};response_id:{}"
+    )
 
 
 LLMResponseTypes = Union[
diff --git a/litellm/utils.py b/litellm/utils.py
index 38e604943a..0150c4f43f 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -516,9 +516,9 @@ def function_setup(  # noqa: PLR0915
         function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
 
         ## DYNAMIC CALLBACKS ##
-        dynamic_callbacks: Optional[
-            List[Union[str, Callable, CustomLogger]]
-        ] = kwargs.pop("callbacks", None)
+        dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
+            kwargs.pop("callbacks", None)
+        )
         all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
 
         if len(all_callbacks) > 0:
@@ -1202,9 +1202,9 @@ def client(original_function):  # noqa: PLR0915
                         exception=e,
                         retry_policy=kwargs.get("retry_policy"),
                     )
-                    kwargs[
-                        "retry_policy"
-                    ] = reset_retry_policy()  # prevent infinite loops
+                    kwargs["retry_policy"] = (
+                        reset_retry_policy()
+                    )  # prevent infinite loops
                 litellm.num_retries = (
                     None  # set retries to None to prevent infinite loops
                 )
@@ -3028,16 +3028,16 @@ def get_optional_params(  # noqa: PLR0915
                     True  # so that main.py adds the function call to the prompt
                 )
                 if "tools" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("tools")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("tools")
+                    )
                     non_default_params.pop(
                         "tool_choice", None
                     )  # causes ollama requests to hang
                 elif "functions" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("functions")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("functions")
+                    )
             elif (
                 litellm.add_function_to_prompt
             ):  # if user opts to add it to prompt instead
@@ -3060,10 +3060,10 @@ def get_optional_params(  # noqa: PLR0915
 
     if "response_format" in non_default_params:
         if provider_config is not None:
-            non_default_params[
-                "response_format"
-            ] = provider_config.get_json_schema_from_pydantic_object(
-                response_format=non_default_params["response_format"]
+            non_default_params["response_format"] = (
+                provider_config.get_json_schema_from_pydantic_object(
+                    response_format=non_default_params["response_format"]
+                )
             )
         else:
             non_default_params["response_format"] = type_to_response_format_param(
@@ -4079,9 +4079,9 @@ def _count_characters(text: str) -> int:
 
 
 def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
-    _choices: Union[
-        List[Union[Choices, StreamingChoices]], List[StreamingChoices]
-    ] = response_obj.choices
+    _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
+        response_obj.choices
+    )
 
     response_str = ""
     for choice in _choices:
@@ -6625,8 +6625,8 @@ class ProviderConfigManager:
 
     @staticmethod
     def get_provider_responses_api_config(
-        model: str,
         provider: LlmProviders,
+        model: Optional[str] = None,
     ) -> Optional[BaseResponsesAPIConfig]:
         if litellm.LlmProviders.OPENAI == provider:
             return litellm.OpenAIResponsesAPIConfig()
diff --git a/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py b/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py
index 202d0aea23..3b9ae72da7 100644
--- a/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py
+++ b/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py
@@ -203,9 +203,6 @@ class TestOpenAIResponsesAPIConfig:
 
         result = self.config.get_complete_url(
             api_base=api_base,
-            model=self.model,
-            api_key="test_api_key",
-            optional_params={},
             litellm_params={},
         )
 
@@ -215,9 +212,6 @@ class TestOpenAIResponsesAPIConfig:
         with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
             result = self.config.get_complete_url(
                 api_base=None,
-                model=self.model,
-                api_key="test_api_key",
-                optional_params={},
                 litellm_params={},
             )
 
@@ -231,9 +225,6 @@ class TestOpenAIResponsesAPIConfig:
             ):
                 result = self.config.get_complete_url(
                     api_base=None,
-                    model=self.model,
-                    api_key="test_api_key",
-                    optional_params={},
                     litellm_params={},
                 )
 
@@ -247,9 +238,6 @@ class TestOpenAIResponsesAPIConfig:
             ):
                 result = self.config.get_complete_url(
                     api_base=None,
-                    model=self.model,
-                    api_key="test_api_key",
-                    optional_params={},
                     litellm_params={},
                 )
 
@@ -260,9 +248,6 @@ class TestOpenAIResponsesAPIConfig:
 
         result = self.config.get_complete_url(
             api_base=api_base,
-            model=self.model,
-            api_key="test_api_key",
-            optional_params={},
             litellm_params={},
         )
 
diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py
index fd39c13604..905b9b3219 100644
--- a/tests/llm_responses_api_testing/base_responses_api.py
+++ b/tests/llm_responses_api_testing/base_responses_api.py
@@ -189,6 +189,90 @@ class BaseResponsesAPITest(ABC):
 
 
 
+    @pytest.mark.parametrize("sync_mode", [False, True])
+    @pytest.mark.asyncio
+    async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        if sync_mode:
+            response = litellm.responses(
+                input="Basic ping", max_output_tokens=20,
+                **base_completion_call_args
+            )
+
+            # delete the response
+            if isinstance(response, ResponsesAPIResponse):
+                litellm.delete_responses(
+                    response_id=response.id,
+                    **base_completion_call_args
+                )
+            else:
+                raise ValueError("response is not a ResponsesAPIResponse")
+        else:
+            response = await litellm.aresponses(
+                input="Basic ping", max_output_tokens=20,
+                **base_completion_call_args
+            )
+
+            # async delete the response
+            if isinstance(response, ResponsesAPIResponse):
+                await litellm.adelete_responses(
+                    response_id=response.id,
+                    **base_completion_call_args
+                )
+            else:
+                raise ValueError("response is not a ResponsesAPIResponse")
+    
+
+    @pytest.mark.parametrize("sync_mode", [True, False])
+    @pytest.mark.asyncio
+    async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
+        #litellm._turn_on_debug()
+        #litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        response_id = None
+        if sync_mode:
+            response_id = None
+            response = litellm.responses(
+                input="Basic ping", max_output_tokens=20,
+                stream=True,
+                **base_completion_call_args
+            )
+            for event in response:
+                print("litellm response=", json.dumps(event, indent=4, default=str))
+                if "response" in event:
+                    response_obj = event.get("response")
+                    if response_obj is not None:
+                        response_id = response_obj.get("id")
+            print("got response_id=", response_id)
+
+            # delete the response
+            assert response_id is not None
+            litellm.delete_responses(
+                response_id=response_id,
+                **base_completion_call_args
+            )
+        else:
+            response = await litellm.aresponses(
+                input="Basic ping", max_output_tokens=20,
+                stream=True,
+                **base_completion_call_args
+            )
+            async for event in response:
+                print("litellm response=", json.dumps(event, indent=4, default=str))
+                if "response" in event:
+                    response_obj = event.get("response")
+                    if response_obj is not None:
+                        response_id = response_obj.get("id")
+            print("got response_id=", response_id)
+
+            # delete the response
+            assert response_id is not None
+            await litellm.adelete_responses(
+                response_id=response_id,
+                **base_completion_call_args
+            )
 
 
 
diff --git a/tests/llm_responses_api_testing/test_anthropic_responses_api.py b/tests/llm_responses_api_testing/test_anthropic_responses_api.py
index 0fcb771f73..b02c9b8d11 100644
--- a/tests/llm_responses_api_testing/test_anthropic_responses_api.py
+++ b/tests/llm_responses_api_testing/test_anthropic_responses_api.py
@@ -29,6 +29,12 @@ class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
         return {
             "model": "anthropic/claude-3-5-sonnet-latest",
         }
+    
+    async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
+        pass
+    
+    async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
+        pass
 
 
 def test_multiturn_tool_calls():