[Feat] Add Support for DELETE /v1/responses/{response_id} on OpenAI, Azure OpenAI (#10205)

* add transform_delete_response_api_request to base responses config

* add transform_delete_response_api_request

* add delete_response_api_handler

* fixes for deleting responses, response API

* add adelete_responses

* add async test_basic_openai_responses_delete_endpoint

* test_basic_openai_responses_delete_endpoint

* working delete for streaming on responses API

* fixes azure transformation

* TestAnthropicResponsesAPITest

* fix code check

* fix linting

* fixes for get_complete_url

* test_basic_openai_responses_streaming_delete_endpoint

* streaming fixes
This commit is contained in:
Ishaan Jaff 2025-04-22 18:27:03 -07:00 committed by Christian Owusu
parent 9919db2742
commit 586938ed3c
15 changed files with 729 additions and 83 deletions

View file

@ -1,11 +1,14 @@
from typing import TYPE_CHECKING, Any, Optional, cast from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast
import httpx import httpx
import litellm import litellm
from litellm._logging import verbose_logger
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import * from litellm.types.llms.openai import *
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams
from litellm.utils import _add_path_to_api_base from litellm.utils import _add_path_to_api_base
if TYPE_CHECKING: if TYPE_CHECKING:
@ -41,11 +44,7 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
def get_complete_url( def get_complete_url(
self, self,
api_base: Optional[str], api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict, litellm_params: dict,
stream: Optional[bool] = None,
) -> str: ) -> str:
""" """
Constructs a complete URL for the API request. Constructs a complete URL for the API request.
@ -92,3 +91,48 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
final_url = httpx.URL(new_url).copy_with(params=query_params) final_url = httpx.URL(new_url).copy_with(params=query_params)
return str(final_url) return str(final_url)
#########################################################
########## DELETE RESPONSE API TRANSFORMATION ##############
#########################################################
def transform_delete_response_api_request(
self,
response_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Transform the delete response API request into a URL and data
Azure OpenAI API expects the following request:
- DELETE /openai/responses/{response_id}?api-version=xxx
This function handles URLs with query parameters by inserting the response_id
at the correct location (before any query parameters).
"""
from urllib.parse import urlparse, urlunparse
# Parse the URL to separate its components
parsed_url = urlparse(api_base)
# Insert the response_id at the end of the path component
# Remove trailing slash if present to avoid double slashes
path = parsed_url.path.rstrip("/")
new_path = f"{path}/{response_id}"
# Reconstruct the URL with all original components but with the modified path
delete_url = urlunparse(
(
parsed_url.scheme, # http, https
parsed_url.netloc, # domain name, port
new_path, # path with response_id added
parsed_url.params, # parameters
parsed_url.query, # query string
parsed_url.fragment, # fragment
)
)
data: Dict = {}
verbose_logger.debug(f"delete response url={delete_url}")
return delete_url, data

View file

@ -1,6 +1,6 @@
import types import types
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, Optional, Union from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
import httpx import httpx
@ -10,6 +10,7 @@ from litellm.types.llms.openai import (
ResponsesAPIResponse, ResponsesAPIResponse,
ResponsesAPIStreamingResponse, ResponsesAPIStreamingResponse,
) )
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
if TYPE_CHECKING: if TYPE_CHECKING:
@ -73,11 +74,7 @@ class BaseResponsesAPIConfig(ABC):
def get_complete_url( def get_complete_url(
self, self,
api_base: Optional[str], api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict, litellm_params: dict,
stream: Optional[bool] = None,
) -> str: ) -> str:
""" """
OPTIONAL OPTIONAL
@ -122,6 +119,31 @@ class BaseResponsesAPIConfig(ABC):
""" """
pass pass
#########################################################
########## DELETE RESPONSE API TRANSFORMATION ##############
#########################################################
@abstractmethod
def transform_delete_response_api_request(
self,
response_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
pass
@abstractmethod
def transform_delete_response_api_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> DeleteResponseResult:
pass
#########################################################
########## END DELETE RESPONSE API TRANSFORMATION ##########
#########################################################
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException: ) -> BaseLLMException:

View file

@ -650,6 +650,49 @@ class HTTPHandler:
except Exception as e: except Exception as e:
raise e raise e
def delete(
self,
url: str,
data: Optional[Union[dict, str]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
stream: bool = False,
):
try:
if timeout is not None:
req = self.client.build_request(
"DELETE", url, data=data, json=json, params=params, headers=headers, timeout=timeout # type: ignore
)
else:
req = self.client.build_request(
"DELETE", url, data=data, json=json, params=params, headers=headers # type: ignore
)
response = self.client.send(req, stream=stream)
response.raise_for_status()
return response
except httpx.TimeoutException:
raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.",
model="default-model-name",
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
raise e
except Exception as e:
raise e
def __del__(self) -> None: def __del__(self) -> None:
try: try:
self.close() self.close()

View file

@ -36,6 +36,7 @@ from litellm.types.llms.openai import (
ResponsesAPIResponse, ResponsesAPIResponse,
) )
from litellm.types.rerank import OptionalRerankParams, RerankResponse from litellm.types.rerank import OptionalRerankParams, RerankResponse
from litellm.types.responses.main import DeleteResponseResult
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
@ -1015,6 +1016,7 @@ class BaseLLMHTTPHandler:
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False, _is_async: bool = False,
fake_stream: bool = False, fake_stream: bool = False,
litellm_metadata: Optional[Dict[str, Any]] = None,
) -> Union[ ) -> Union[
ResponsesAPIResponse, ResponsesAPIResponse,
BaseResponsesAPIStreamingIterator, BaseResponsesAPIStreamingIterator,
@ -1041,6 +1043,7 @@ class BaseLLMHTTPHandler:
timeout=timeout, timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None, client=client if isinstance(client, AsyncHTTPHandler) else None,
fake_stream=fake_stream, fake_stream=fake_stream,
litellm_metadata=litellm_metadata,
) )
if client is None or not isinstance(client, HTTPHandler): if client is None or not isinstance(client, HTTPHandler):
@ -1064,11 +1067,7 @@ class BaseLLMHTTPHandler:
api_base = responses_api_provider_config.get_complete_url( api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base, api_base=litellm_params.api_base,
api_key=litellm_params.api_key,
model=model,
optional_params=response_api_optional_request_params,
litellm_params=dict(litellm_params), litellm_params=dict(litellm_params),
stream=stream,
) )
data = responses_api_provider_config.transform_responses_api_request( data = responses_api_provider_config.transform_responses_api_request(
@ -1113,6 +1112,8 @@ class BaseLLMHTTPHandler:
model=model, model=model,
logging_obj=logging_obj, logging_obj=logging_obj,
responses_api_provider_config=responses_api_provider_config, responses_api_provider_config=responses_api_provider_config,
litellm_metadata=litellm_metadata,
custom_llm_provider=custom_llm_provider,
) )
return SyncResponsesAPIStreamingIterator( return SyncResponsesAPIStreamingIterator(
@ -1120,6 +1121,8 @@ class BaseLLMHTTPHandler:
model=model, model=model,
logging_obj=logging_obj, logging_obj=logging_obj,
responses_api_provider_config=responses_api_provider_config, responses_api_provider_config=responses_api_provider_config,
litellm_metadata=litellm_metadata,
custom_llm_provider=custom_llm_provider,
) )
else: else:
# For non-streaming requests # For non-streaming requests
@ -1156,6 +1159,7 @@ class BaseLLMHTTPHandler:
timeout: Optional[Union[float, httpx.Timeout]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
fake_stream: bool = False, fake_stream: bool = False,
litellm_metadata: Optional[Dict[str, Any]] = None,
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]: ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
""" """
Async version of the responses API handler. Async version of the responses API handler.
@ -1183,11 +1187,7 @@ class BaseLLMHTTPHandler:
api_base = responses_api_provider_config.get_complete_url( api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base, api_base=litellm_params.api_base,
api_key=litellm_params.api_key,
model=model,
optional_params=response_api_optional_request_params,
litellm_params=dict(litellm_params), litellm_params=dict(litellm_params),
stream=stream,
) )
data = responses_api_provider_config.transform_responses_api_request( data = responses_api_provider_config.transform_responses_api_request(
@ -1234,6 +1234,8 @@ class BaseLLMHTTPHandler:
model=model, model=model,
logging_obj=logging_obj, logging_obj=logging_obj,
responses_api_provider_config=responses_api_provider_config, responses_api_provider_config=responses_api_provider_config,
litellm_metadata=litellm_metadata,
custom_llm_provider=custom_llm_provider,
) )
# Return the streaming iterator # Return the streaming iterator
@ -1242,6 +1244,8 @@ class BaseLLMHTTPHandler:
model=model, model=model,
logging_obj=logging_obj, logging_obj=logging_obj,
responses_api_provider_config=responses_api_provider_config, responses_api_provider_config=responses_api_provider_config,
litellm_metadata=litellm_metadata,
custom_llm_provider=custom_llm_provider,
) )
else: else:
# For non-streaming, proceed as before # For non-streaming, proceed as before
@ -1265,6 +1269,163 @@ class BaseLLMHTTPHandler:
logging_obj=logging_obj, logging_obj=logging_obj,
) )
async def async_delete_response_api_handler(
self,
response_id: str,
responses_api_provider_config: BaseResponsesAPIConfig,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
custom_llm_provider: Optional[str],
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> DeleteResponseResult:
"""
Async version of the delete response API handler.
Uses async HTTP client to make requests.
"""
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = responses_api_provider_config.validate_environment(
api_key=litellm_params.api_key,
headers=extra_headers or {},
model="None",
)
if extra_headers:
headers.update(extra_headers)
api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base,
litellm_params=dict(litellm_params),
)
url, data = responses_api_provider_config.transform_delete_response_api_request(
response_id=response_id,
api_base=api_base,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=input,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await async_httpx_client.delete(
url=url, headers=headers, data=json.dumps(data), timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e,
provider_config=responses_api_provider_config,
)
return responses_api_provider_config.transform_delete_response_api_response(
raw_response=response,
logging_obj=logging_obj,
)
def delete_response_api_handler(
self,
response_id: str,
responses_api_provider_config: BaseResponsesAPIConfig,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
custom_llm_provider: Optional[str],
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
"""
Async version of the responses API handler.
Uses async HTTP client to make requests.
"""
if _is_async:
return self.async_delete_response_api_handler(
response_id=response_id,
responses_api_provider_config=responses_api_provider_config,
litellm_params=litellm_params,
logging_obj=logging_obj,
custom_llm_provider=custom_llm_provider,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
client=client,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = responses_api_provider_config.validate_environment(
api_key=litellm_params.api_key,
headers=extra_headers or {},
model="None",
)
if extra_headers:
headers.update(extra_headers)
api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base,
litellm_params=dict(litellm_params),
)
url, data = responses_api_provider_config.transform_delete_response_api_request(
response_id=response_id,
api_base=api_base,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=input,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": api_base,
"headers": headers,
},
)
try:
response = sync_httpx_client.delete(
url=url, headers=headers, data=json.dumps(data), timeout=timeout
)
except Exception as e:
raise self._handle_error(
e=e,
provider_config=responses_api_provider_config,
)
return responses_api_provider_config.transform_delete_response_api_response(
raw_response=response,
logging_obj=logging_obj,
)
def create_file( def create_file(
self, self,
create_file_data: CreateFileRequest, create_file_data: CreateFileRequest,

View file

@ -7,6 +7,7 @@ from litellm._logging import verbose_logger
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import * from litellm.types.llms.openai import *
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from ..common_utils import OpenAIError from ..common_utils import OpenAIError
@ -110,11 +111,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
def get_complete_url( def get_complete_url(
self, self,
api_base: Optional[str], api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict, litellm_params: dict,
stream: Optional[bool] = None,
) -> str: ) -> str:
""" """
Get the endpoint for OpenAI responses API Get the endpoint for OpenAI responses API
@ -217,3 +214,39 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
f"Error getting model info in OpenAIResponsesAPIConfig: {e}" f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
) )
return False return False
#########################################################
########## DELETE RESPONSE API TRANSFORMATION ##############
#########################################################
def transform_delete_response_api_request(
self,
response_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Transform the delete response API request into a URL and data
OpenAI API expects the following request
- DELETE /v1/responses/{response_id}
"""
url = f"{api_base}/{response_id}"
data: Dict = {}
return url, data
def transform_delete_response_api_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> DeleteResponseResult:
"""
Transform the delete response API response into a DeleteResponseResult
"""
try:
raw_response_json = raw_response.json()
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
return DeleteResponseResult(**raw_response_json)

View file

@ -1,7 +1,7 @@
import asyncio import asyncio
import contextvars import contextvars
from functools import partial from functools import partial
from typing import Any, Dict, Iterable, List, Literal, Optional, Union from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
import httpx import httpx
@ -24,6 +24,7 @@ from litellm.types.llms.openai import (
ToolChoice, ToolChoice,
ToolParam, ToolParam,
) )
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.utils import ProviderConfigManager, client from litellm.utils import ProviderConfigManager, client
@ -121,7 +122,8 @@ async def aresponses(
if isinstance(response, ResponsesAPIResponse): if isinstance(response, ResponsesAPIResponse):
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response, responses_api_response=response,
kwargs=kwargs, litellm_metadata=kwargs.get("litellm_metadata", {}),
custom_llm_provider=custom_llm_provider,
) )
return response return response
except Exception as e: except Exception as e:
@ -253,13 +255,15 @@ def responses(
fake_stream=responses_api_provider_config.should_fake_stream( fake_stream=responses_api_provider_config.should_fake_stream(
model=model, stream=stream, custom_llm_provider=custom_llm_provider model=model, stream=stream, custom_llm_provider=custom_llm_provider
), ),
litellm_metadata=kwargs.get("litellm_metadata", {}),
) )
# Update the responses_api_response_id with the model_id # Update the responses_api_response_id with the model_id
if isinstance(response, ResponsesAPIResponse): if isinstance(response, ResponsesAPIResponse):
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response, responses_api_response=response,
kwargs=kwargs, litellm_metadata=kwargs.get("litellm_metadata", {}),
custom_llm_provider=custom_llm_provider,
) )
return response return response
@ -271,3 +275,162 @@ def responses(
completion_kwargs=local_vars, completion_kwargs=local_vars,
extra_kwargs=kwargs, extra_kwargs=kwargs,
) )
@client
async def adelete_responses(
response_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
# LiteLLM specific params,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> DeleteResponseResult:
"""
Async version of the DELETE Responses API
DELETE /v1/responses/{response_id} endpoint in the responses API
"""
local_vars = locals()
try:
loop = asyncio.get_event_loop()
kwargs["adelete_responses"] = True
# get custom llm provider from response_id
decoded_response_id: DecodedResponseId = (
ResponsesAPIRequestUtils._decode_responses_api_response_id(
response_id=response_id,
)
)
response_id = decoded_response_id.get("response_id") or response_id
custom_llm_provider = (
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
)
func = partial(
delete_responses,
response_id=response_id,
custom_llm_provider=custom_llm_provider,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
**kwargs,
)
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response
return response
except Exception as e:
raise litellm.exception_type(
model=None,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=local_vars,
extra_kwargs=kwargs,
)
@client
def delete_responses(
response_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
# LiteLLM specific params,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
"""
Synchronous version of the DELETE Responses API
DELETE /v1/responses/{response_id} endpoint in the responses API
"""
local_vars = locals()
try:
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
_is_async = kwargs.pop("adelete_responses", False) is True
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
# get custom llm provider from response_id
decoded_response_id: DecodedResponseId = (
ResponsesAPIRequestUtils._decode_responses_api_response_id(
response_id=response_id,
)
)
response_id = decoded_response_id.get("response_id") or response_id
custom_llm_provider = (
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
)
if custom_llm_provider is None:
raise ValueError("custom_llm_provider is required but passed as None")
# get provider config
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
ProviderConfigManager.get_provider_responses_api_config(
model=None,
provider=litellm.LlmProviders(custom_llm_provider),
)
)
if responses_api_provider_config is None:
raise ValueError(
f"DELETE responses is not supported for {custom_llm_provider}"
)
local_vars.update(kwargs)
# Pre Call logging
litellm_logging_obj.update_environment_variables(
model=None,
optional_params={
"response_id": response_id,
},
litellm_params={
"litellm_call_id": litellm_call_id,
},
custom_llm_provider=custom_llm_provider,
)
# Call the handler with _is_async flag instead of directly calling the async handler
response = base_llm_http_handler.delete_response_api_handler(
response_id=response_id,
custom_llm_provider=custom_llm_provider,
responses_api_provider_config=responses_api_provider_config,
litellm_params=litellm_params,
logging_obj=litellm_logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout or request_timeout,
_is_async=_is_async,
client=kwargs.get("client"),
)
return response
except Exception as e:
raise litellm.exception_type(
model=None,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=local_vars,
extra_kwargs=kwargs,
)

View file

@ -1,7 +1,7 @@
import asyncio import asyncio
import json import json
from datetime import datetime from datetime import datetime
from typing import Optional from typing import Any, Dict, Optional
import httpx import httpx
@ -10,6 +10,7 @@ from litellm.litellm_core_utils.asyncify import run_async_function
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.thread_pool_executor import executor from litellm.litellm_core_utils.thread_pool_executor import executor
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
OutputTextDeltaEvent, OutputTextDeltaEvent,
ResponseCompletedEvent, ResponseCompletedEvent,
@ -33,6 +34,8 @@ class BaseResponsesAPIStreamingIterator:
model: str, model: str,
responses_api_provider_config: BaseResponsesAPIConfig, responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj, logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
): ):
self.response = response self.response = response
self.model = model self.model = model
@ -42,6 +45,10 @@ class BaseResponsesAPIStreamingIterator:
self.completed_response: Optional[ResponsesAPIStreamingResponse] = None self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
self.start_time = datetime.now() self.start_time = datetime.now()
# set request kwargs
self.litellm_metadata = litellm_metadata
self.custom_llm_provider = custom_llm_provider
def _process_chunk(self, chunk): def _process_chunk(self, chunk):
"""Process a single chunk of data from the stream""" """Process a single chunk of data from the stream"""
if not chunk: if not chunk:
@ -70,6 +77,17 @@ class BaseResponsesAPIStreamingIterator:
logging_obj=self.logging_obj, logging_obj=self.logging_obj,
) )
) )
# if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider
response_object = getattr(openai_responses_api_chunk, "response", None)
if response_object:
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response_object,
litellm_metadata=self.litellm_metadata,
custom_llm_provider=self.custom_llm_provider,
)
setattr(openai_responses_api_chunk, "response", response)
# Store the completed response # Store the completed response
if ( if (
openai_responses_api_chunk openai_responses_api_chunk
@ -102,8 +120,17 @@ class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
model: str, model: str,
responses_api_provider_config: BaseResponsesAPIConfig, responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj, logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
): ):
super().__init__(response, model, responses_api_provider_config, logging_obj) super().__init__(
response,
model,
responses_api_provider_config,
logging_obj,
litellm_metadata,
custom_llm_provider,
)
self.stream_iterator = response.aiter_lines() self.stream_iterator = response.aiter_lines()
def __aiter__(self): def __aiter__(self):
@ -163,8 +190,17 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
model: str, model: str,
responses_api_provider_config: BaseResponsesAPIConfig, responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj, logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
): ):
super().__init__(response, model, responses_api_provider_config, logging_obj) super().__init__(
response,
model,
responses_api_provider_config,
logging_obj,
litellm_metadata,
custom_llm_provider,
)
self.stream_iterator = response.iter_lines() self.stream_iterator = response.iter_lines()
def __iter__(self): def __iter__(self):
@ -228,12 +264,16 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
model: str, model: str,
responses_api_provider_config: BaseResponsesAPIConfig, responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj, logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
): ):
super().__init__( super().__init__(
response=response, response=response,
model=model, model=model,
responses_api_provider_config=responses_api_provider_config, responses_api_provider_config=responses_api_provider_config,
logging_obj=logging_obj, logging_obj=logging_obj,
litellm_metadata=litellm_metadata,
custom_llm_provider=custom_llm_provider,
) )
# one-time transform # one-time transform

View file

@ -1,5 +1,5 @@
import base64 import base64
from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints from typing import Any, Dict, Optional, Union, cast, get_type_hints
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
@ -9,6 +9,7 @@ from litellm.types.llms.openai import (
ResponsesAPIOptionalRequestParams, ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse, ResponsesAPIResponse,
) )
from litellm.types.responses.main import DecodedResponseId
from litellm.types.utils import SpecialEnums, Usage from litellm.types.utils import SpecialEnums, Usage
@ -83,30 +84,36 @@ class ResponsesAPIRequestUtils:
@staticmethod @staticmethod
def _update_responses_api_response_id_with_model_id( def _update_responses_api_response_id_with_model_id(
responses_api_response: ResponsesAPIResponse, responses_api_response: ResponsesAPIResponse,
kwargs: Dict[str, Any], custom_llm_provider: Optional[str],
litellm_metadata: Optional[Dict[str, Any]] = None,
) -> ResponsesAPIResponse: ) -> ResponsesAPIResponse:
"""Update the responses_api_response_id with the model_id""" """
litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {} Update the responses_api_response_id with model_id and custom_llm_provider
This builds a composite ID containing the custom LLM provider, model ID, and original response ID
"""
litellm_metadata = litellm_metadata or {}
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {} model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
model_id = model_info.get("id") model_id = model_info.get("id")
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id( updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
model_id=model_id, model_id=model_id,
custom_llm_provider=custom_llm_provider,
response_id=responses_api_response.id, response_id=responses_api_response.id,
) )
responses_api_response.id = updated_id responses_api_response.id = updated_id
return responses_api_response return responses_api_response
@staticmethod @staticmethod
def _build_responses_api_response_id( def _build_responses_api_response_id(
custom_llm_provider: Optional[str],
model_id: Optional[str], model_id: Optional[str],
response_id: str, response_id: str,
) -> str: ) -> str:
"""Build the responses_api_response_id""" """Build the responses_api_response_id"""
if model_id is None:
return response_id
assembled_id: str = str( assembled_id: str = str(
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
).format(model_id, response_id) ).format(custom_llm_provider, model_id, response_id)
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode( base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
"utf-8" "utf-8"
) )
@ -115,12 +122,12 @@ class ResponsesAPIRequestUtils:
@staticmethod @staticmethod
def _decode_responses_api_response_id( def _decode_responses_api_response_id(
response_id: str, response_id: str,
) -> Tuple[Optional[str], str]: ) -> DecodedResponseId:
""" """
Decode the responses_api_response_id Decode the responses_api_response_id
Returns: Returns:
Tuple of model_id, response_id (from upstream provider) DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
""" """
try: try:
# Remove prefix and decode # Remove prefix and decode
@ -129,16 +136,45 @@ class ResponsesAPIRequestUtils:
# Parse components using known prefixes # Parse components using known prefixes
if ";" not in decoded_id: if ";" not in decoded_id:
return None, response_id return DecodedResponseId(
custom_llm_provider=None,
model_id=None,
response_id=response_id,
)
model_part, response_part = decoded_id.split(";", 1) parts = decoded_id.split(";")
model_id = model_part.replace("litellm:model_id:", "")
decoded_response_id = response_part.replace("response_id:", "")
return model_id, decoded_response_id # Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
custom_llm_provider = None
model_id = None
if (
len(parts) >= 3
): # Full format with custom_llm_provider, model_id, and response_id
custom_llm_provider_part = parts[0]
model_id_part = parts[1]
response_part = parts[2]
custom_llm_provider = custom_llm_provider_part.replace(
"litellm:custom_llm_provider:", ""
)
model_id = model_id_part.replace("model_id:", "")
decoded_response_id = response_part.replace("response_id:", "")
else:
decoded_response_id = response_id
return DecodedResponseId(
custom_llm_provider=custom_llm_provider,
model_id=model_id,
response_id=decoded_response_id,
)
except Exception as e: except Exception as e:
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}") verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
return None, response_id return DecodedResponseId(
custom_llm_provider=None,
model_id=None,
response_id=response_id,
)
class ResponseAPILoggingUtils: class ResponseAPILoggingUtils:

View file

@ -31,11 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
if previous_response_id is None: if previous_response_id is None:
return healthy_deployments return healthy_deployments
model_id, response_id = ( decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id(
ResponsesAPIRequestUtils._decode_responses_api_response_id( response_id=previous_response_id,
response_id=previous_response_id,
)
) )
model_id = decoded_response.get("model_id")
if model_id is None: if model_id is None:
return healthy_deployments return healthy_deployments

View file

@ -1,5 +1,6 @@
from typing import Literal from typing import Literal
from pydantic import PrivateAttr
from typing_extensions import Any, List, Optional, TypedDict from typing_extensions import Any, List, Optional, TypedDict
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
@ -46,3 +47,30 @@ class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
status: str # "completed", "in_progress", etc. status: str # "completed", "in_progress", etc.
role: str # "assistant", "user", etc. role: str # "assistant", "user", etc.
content: List[OutputText] content: List[OutputText]
class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject):
"""
Result of a delete response request
{
"id": "resp_6786a1bec27481909a17d673315b29f6",
"object": "response",
"deleted": true
}
"""
id: Optional[str]
object: Optional[str]
deleted: Optional[bool]
# Define private attributes using PrivateAttr
_hidden_params: dict = PrivateAttr(default_factory=dict)
class DecodedResponseId(TypedDict, total=False):
"""Structure representing a decoded response ID"""
custom_llm_provider: Optional[str]
model_id: Optional[str]
response_id: str

View file

@ -2254,7 +2254,9 @@ class SpecialEnums(Enum):
LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy" LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}" LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}" LITELLM_MANAGED_RESPONSE_COMPLETE_STR = (
"litellm:custom_llm_provider:{};model_id:{};response_id:{}"
)
LLMResponseTypes = Union[ LLMResponseTypes = Union[

View file

@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
## DYNAMIC CALLBACKS ## ## DYNAMIC CALLBACKS ##
dynamic_callbacks: Optional[ dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
List[Union[str, Callable, CustomLogger]] kwargs.pop("callbacks", None)
] = kwargs.pop("callbacks", None) )
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
if len(all_callbacks) > 0: if len(all_callbacks) > 0:
@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915
exception=e, exception=e,
retry_policy=kwargs.get("retry_policy"), retry_policy=kwargs.get("retry_policy"),
) )
kwargs[ kwargs["retry_policy"] = (
"retry_policy" reset_retry_policy()
] = reset_retry_policy() # prevent infinite loops ) # prevent infinite loops
litellm.num_retries = ( litellm.num_retries = (
None # set retries to None to prevent infinite loops None # set retries to None to prevent infinite loops
) )
@ -3028,16 +3028,16 @@ def get_optional_params( # noqa: PLR0915
True # so that main.py adds the function call to the prompt True # so that main.py adds the function call to the prompt
) )
if "tools" in non_default_params: if "tools" in non_default_params:
optional_params[ optional_params["functions_unsupported_model"] = (
"functions_unsupported_model" non_default_params.pop("tools")
] = non_default_params.pop("tools") )
non_default_params.pop( non_default_params.pop(
"tool_choice", None "tool_choice", None
) # causes ollama requests to hang ) # causes ollama requests to hang
elif "functions" in non_default_params: elif "functions" in non_default_params:
optional_params[ optional_params["functions_unsupported_model"] = (
"functions_unsupported_model" non_default_params.pop("functions")
] = non_default_params.pop("functions") )
elif ( elif (
litellm.add_function_to_prompt litellm.add_function_to_prompt
): # if user opts to add it to prompt instead ): # if user opts to add it to prompt instead
@ -3060,10 +3060,10 @@ def get_optional_params( # noqa: PLR0915
if "response_format" in non_default_params: if "response_format" in non_default_params:
if provider_config is not None: if provider_config is not None:
non_default_params[ non_default_params["response_format"] = (
"response_format" provider_config.get_json_schema_from_pydantic_object(
] = provider_config.get_json_schema_from_pydantic_object( response_format=non_default_params["response_format"]
response_format=non_default_params["response_format"] )
) )
else: else:
non_default_params["response_format"] = type_to_response_format_param( non_default_params["response_format"] = type_to_response_format_param(
@ -4079,9 +4079,9 @@ def _count_characters(text: str) -> int:
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str: def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
_choices: Union[ _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
List[Union[Choices, StreamingChoices]], List[StreamingChoices] response_obj.choices
] = response_obj.choices )
response_str = "" response_str = ""
for choice in _choices: for choice in _choices:
@ -6625,8 +6625,8 @@ class ProviderConfigManager:
@staticmethod @staticmethod
def get_provider_responses_api_config( def get_provider_responses_api_config(
model: str,
provider: LlmProviders, provider: LlmProviders,
model: Optional[str] = None,
) -> Optional[BaseResponsesAPIConfig]: ) -> Optional[BaseResponsesAPIConfig]:
if litellm.LlmProviders.OPENAI == provider: if litellm.LlmProviders.OPENAI == provider:
return litellm.OpenAIResponsesAPIConfig() return litellm.OpenAIResponsesAPIConfig()

View file

@ -203,9 +203,6 @@ class TestOpenAIResponsesAPIConfig:
result = self.config.get_complete_url( result = self.config.get_complete_url(
api_base=api_base, api_base=api_base,
model=self.model,
api_key="test_api_key",
optional_params={},
litellm_params={}, litellm_params={},
) )
@ -215,9 +212,6 @@ class TestOpenAIResponsesAPIConfig:
with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"): with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
result = self.config.get_complete_url( result = self.config.get_complete_url(
api_base=None, api_base=None,
model=self.model,
api_key="test_api_key",
optional_params={},
litellm_params={}, litellm_params={},
) )
@ -231,9 +225,6 @@ class TestOpenAIResponsesAPIConfig:
): ):
result = self.config.get_complete_url( result = self.config.get_complete_url(
api_base=None, api_base=None,
model=self.model,
api_key="test_api_key",
optional_params={},
litellm_params={}, litellm_params={},
) )
@ -247,9 +238,6 @@ class TestOpenAIResponsesAPIConfig:
): ):
result = self.config.get_complete_url( result = self.config.get_complete_url(
api_base=None, api_base=None,
model=self.model,
api_key="test_api_key",
optional_params={},
litellm_params={}, litellm_params={},
) )
@ -260,9 +248,6 @@ class TestOpenAIResponsesAPIConfig:
result = self.config.get_complete_url( result = self.config.get_complete_url(
api_base=api_base, api_base=api_base,
model=self.model,
api_key="test_api_key",
optional_params={},
litellm_params={}, litellm_params={},
) )

View file

@ -189,6 +189,90 @@ class BaseResponsesAPITest(ABC):
@pytest.mark.parametrize("sync_mode", [False, True])
@pytest.mark.asyncio
async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
if sync_mode:
response = litellm.responses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
# delete the response
if isinstance(response, ResponsesAPIResponse):
litellm.delete_responses(
response_id=response.id,
**base_completion_call_args
)
else:
raise ValueError("response is not a ResponsesAPIResponse")
else:
response = await litellm.aresponses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
# async delete the response
if isinstance(response, ResponsesAPIResponse):
await litellm.adelete_responses(
response_id=response.id,
**base_completion_call_args
)
else:
raise ValueError("response is not a ResponsesAPIResponse")
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
#litellm._turn_on_debug()
#litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
response_id = None
if sync_mode:
response_id = None
response = litellm.responses(
input="Basic ping", max_output_tokens=20,
stream=True,
**base_completion_call_args
)
for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
if "response" in event:
response_obj = event.get("response")
if response_obj is not None:
response_id = response_obj.get("id")
print("got response_id=", response_id)
# delete the response
assert response_id is not None
litellm.delete_responses(
response_id=response_id,
**base_completion_call_args
)
else:
response = await litellm.aresponses(
input="Basic ping", max_output_tokens=20,
stream=True,
**base_completion_call_args
)
async for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
if "response" in event:
response_obj = event.get("response")
if response_obj is not None:
response_id = response_obj.get("id")
print("got response_id=", response_id)
# delete the response
assert response_id is not None
await litellm.adelete_responses(
response_id=response_id,
**base_completion_call_args
)

View file

@ -29,6 +29,12 @@ class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
return { return {
"model": "anthropic/claude-3-5-sonnet-latest", "model": "anthropic/claude-3-5-sonnet-latest",
} }
async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
pass
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
pass
def test_multiturn_tool_calls(): def test_multiturn_tool_calls():