diff --git a/litellm/llms/azure/responses/transformation.py b/litellm/llms/azure/responses/transformation.py index a85ba73bec..499d21cb0e 100644 --- a/litellm/llms/azure/responses/transformation.py +++ b/litellm/llms/azure/responses/transformation.py @@ -1,11 +1,14 @@ -from typing import TYPE_CHECKING, Any, Optional, cast +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast import httpx import litellm +from litellm._logging import verbose_logger from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import * +from litellm.types.responses.main import * +from litellm.types.router import GenericLiteLLMParams from litellm.utils import _add_path_to_api_base if TYPE_CHECKING: @@ -41,11 +44,7 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig): def get_complete_url( self, api_base: Optional[str], - api_key: Optional[str], - model: str, - optional_params: dict, litellm_params: dict, - stream: Optional[bool] = None, ) -> str: """ Constructs a complete URL for the API request. @@ -92,3 +91,48 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig): final_url = httpx.URL(new_url).copy_with(params=query_params) return str(final_url) + + ######################################################### + ########## DELETE RESPONSE API TRANSFORMATION ############## + ######################################################### + def transform_delete_response_api_request( + self, + response_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Transform the delete response API request into a URL and data + + Azure OpenAI API expects the following request: + - DELETE /openai/responses/{response_id}?api-version=xxx + + This function handles URLs with query parameters by inserting the response_id + at the correct location (before any query parameters). + """ + from urllib.parse import urlparse, urlunparse + + # Parse the URL to separate its components + parsed_url = urlparse(api_base) + + # Insert the response_id at the end of the path component + # Remove trailing slash if present to avoid double slashes + path = parsed_url.path.rstrip("/") + new_path = f"{path}/{response_id}" + + # Reconstruct the URL with all original components but with the modified path + delete_url = urlunparse( + ( + parsed_url.scheme, # http, https + parsed_url.netloc, # domain name, port + new_path, # path with response_id added + parsed_url.params, # parameters + parsed_url.query, # query string + parsed_url.fragment, # fragment + ) + ) + + data: Dict = {} + verbose_logger.debug(f"delete response url={delete_url}") + return delete_url, data diff --git a/litellm/llms/base_llm/responses/transformation.py b/litellm/llms/base_llm/responses/transformation.py index 649b91226f..15ce8cba3f 100644 --- a/litellm/llms/base_llm/responses/transformation.py +++ b/litellm/llms/base_llm/responses/transformation.py @@ -1,6 +1,6 @@ import types from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union import httpx @@ -10,6 +10,7 @@ from litellm.types.llms.openai import ( ResponsesAPIResponse, ResponsesAPIStreamingResponse, ) +from litellm.types.responses.main import * from litellm.types.router import GenericLiteLLMParams if TYPE_CHECKING: @@ -73,11 +74,7 @@ class BaseResponsesAPIConfig(ABC): def get_complete_url( self, api_base: Optional[str], - api_key: Optional[str], - model: str, - optional_params: dict, litellm_params: dict, - stream: Optional[bool] = None, ) -> str: """ OPTIONAL @@ -122,6 +119,31 @@ class BaseResponsesAPIConfig(ABC): """ pass + ######################################################### + ########## DELETE RESPONSE API TRANSFORMATION ############## + ######################################################### + @abstractmethod + def transform_delete_response_api_request( + self, + response_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + pass + + @abstractmethod + def transform_delete_response_api_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + ) -> DeleteResponseResult: + pass + + ######################################################### + ########## END DELETE RESPONSE API TRANSFORMATION ########## + ######################################################### + def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index 627dd8c9f9..f99e04ab9d 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -650,6 +650,49 @@ class HTTPHandler: except Exception as e: raise e + def delete( + self, + url: str, + data: Optional[Union[dict, str]] = None, # type: ignore + json: Optional[dict] = None, + params: Optional[dict] = None, + headers: Optional[dict] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + stream: bool = False, + ): + try: + if timeout is not None: + req = self.client.build_request( + "DELETE", url, data=data, json=json, params=params, headers=headers, timeout=timeout # type: ignore + ) + else: + req = self.client.build_request( + "DELETE", url, data=data, json=json, params=params, headers=headers # type: ignore + ) + response = self.client.send(req, stream=stream) + response.raise_for_status() + return response + except httpx.TimeoutException: + raise litellm.Timeout( + message=f"Connection timed out after {timeout} seconds.", + model="default-model-name", + llm_provider="litellm-httpx-handler", + ) + except httpx.HTTPStatusError as e: + if stream is True: + setattr(e, "message", mask_sensitive_info(e.response.read())) + setattr(e, "text", mask_sensitive_info(e.response.read())) + else: + error_text = mask_sensitive_info(e.response.text) + setattr(e, "message", error_text) + setattr(e, "text", error_text) + + setattr(e, "status_code", e.response.status_code) + + raise e + except Exception as e: + raise e + def __del__(self) -> None: try: self.close() diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index c7b18215d0..1958ef0b60 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -36,6 +36,7 @@ from litellm.types.llms.openai import ( ResponsesAPIResponse, ) from litellm.types.rerank import OptionalRerankParams, RerankResponse +from litellm.types.responses.main import DeleteResponseResult from litellm.types.router import GenericLiteLLMParams from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager @@ -1015,6 +1016,7 @@ class BaseLLMHTTPHandler: client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, fake_stream: bool = False, + litellm_metadata: Optional[Dict[str, Any]] = None, ) -> Union[ ResponsesAPIResponse, BaseResponsesAPIStreamingIterator, @@ -1041,6 +1043,7 @@ class BaseLLMHTTPHandler: timeout=timeout, client=client if isinstance(client, AsyncHTTPHandler) else None, fake_stream=fake_stream, + litellm_metadata=litellm_metadata, ) if client is None or not isinstance(client, HTTPHandler): @@ -1064,11 +1067,7 @@ class BaseLLMHTTPHandler: api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, - api_key=litellm_params.api_key, - model=model, - optional_params=response_api_optional_request_params, litellm_params=dict(litellm_params), - stream=stream, ) data = responses_api_provider_config.transform_responses_api_request( @@ -1113,6 +1112,8 @@ class BaseLLMHTTPHandler: model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, + litellm_metadata=litellm_metadata, + custom_llm_provider=custom_llm_provider, ) return SyncResponsesAPIStreamingIterator( @@ -1120,6 +1121,8 @@ class BaseLLMHTTPHandler: model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, + litellm_metadata=litellm_metadata, + custom_llm_provider=custom_llm_provider, ) else: # For non-streaming requests @@ -1156,6 +1159,7 @@ class BaseLLMHTTPHandler: timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, fake_stream: bool = False, + litellm_metadata: Optional[Dict[str, Any]] = None, ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]: """ Async version of the responses API handler. @@ -1183,11 +1187,7 @@ class BaseLLMHTTPHandler: api_base = responses_api_provider_config.get_complete_url( api_base=litellm_params.api_base, - api_key=litellm_params.api_key, - model=model, - optional_params=response_api_optional_request_params, litellm_params=dict(litellm_params), - stream=stream, ) data = responses_api_provider_config.transform_responses_api_request( @@ -1234,6 +1234,8 @@ class BaseLLMHTTPHandler: model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, + litellm_metadata=litellm_metadata, + custom_llm_provider=custom_llm_provider, ) # Return the streaming iterator @@ -1242,6 +1244,8 @@ class BaseLLMHTTPHandler: model=model, logging_obj=logging_obj, responses_api_provider_config=responses_api_provider_config, + litellm_metadata=litellm_metadata, + custom_llm_provider=custom_llm_provider, ) else: # For non-streaming, proceed as before @@ -1265,6 +1269,163 @@ class BaseLLMHTTPHandler: logging_obj=logging_obj, ) + async def async_delete_response_api_handler( + self, + response_id: str, + responses_api_provider_config: BaseResponsesAPIConfig, + litellm_params: GenericLiteLLMParams, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str], + extra_headers: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + _is_async: bool = False, + ) -> DeleteResponseResult: + """ + Async version of the delete response API handler. + Uses async HTTP client to make requests. + """ + if client is None or not isinstance(client, AsyncHTTPHandler): + async_httpx_client = get_async_httpx_client( + llm_provider=litellm.LlmProviders(custom_llm_provider), + params={"ssl_verify": litellm_params.get("ssl_verify", None)}, + ) + else: + async_httpx_client = client + + headers = responses_api_provider_config.validate_environment( + api_key=litellm_params.api_key, + headers=extra_headers or {}, + model="None", + ) + + if extra_headers: + headers.update(extra_headers) + + api_base = responses_api_provider_config.get_complete_url( + api_base=litellm_params.api_base, + litellm_params=dict(litellm_params), + ) + + url, data = responses_api_provider_config.transform_delete_response_api_request( + response_id=response_id, + api_base=api_base, + litellm_params=litellm_params, + headers=headers, + ) + + ## LOGGING + logging_obj.pre_call( + input=input, + api_key="", + additional_args={ + "complete_input_dict": data, + "api_base": api_base, + "headers": headers, + }, + ) + + try: + response = await async_httpx_client.delete( + url=url, headers=headers, data=json.dumps(data), timeout=timeout + ) + + except Exception as e: + raise self._handle_error( + e=e, + provider_config=responses_api_provider_config, + ) + + return responses_api_provider_config.transform_delete_response_api_response( + raw_response=response, + logging_obj=logging_obj, + ) + + def delete_response_api_handler( + self, + response_id: str, + responses_api_provider_config: BaseResponsesAPIConfig, + litellm_params: GenericLiteLLMParams, + logging_obj: LiteLLMLoggingObj, + custom_llm_provider: Optional[str], + extra_headers: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + _is_async: bool = False, + ) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]: + """ + Async version of the responses API handler. + Uses async HTTP client to make requests. + """ + if _is_async: + return self.async_delete_response_api_handler( + response_id=response_id, + responses_api_provider_config=responses_api_provider_config, + litellm_params=litellm_params, + logging_obj=logging_obj, + custom_llm_provider=custom_llm_provider, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + client=client, + ) + if client is None or not isinstance(client, HTTPHandler): + sync_httpx_client = _get_httpx_client( + params={"ssl_verify": litellm_params.get("ssl_verify", None)} + ) + else: + sync_httpx_client = client + + headers = responses_api_provider_config.validate_environment( + api_key=litellm_params.api_key, + headers=extra_headers or {}, + model="None", + ) + + if extra_headers: + headers.update(extra_headers) + + api_base = responses_api_provider_config.get_complete_url( + api_base=litellm_params.api_base, + litellm_params=dict(litellm_params), + ) + + url, data = responses_api_provider_config.transform_delete_response_api_request( + response_id=response_id, + api_base=api_base, + litellm_params=litellm_params, + headers=headers, + ) + + ## LOGGING + logging_obj.pre_call( + input=input, + api_key="", + additional_args={ + "complete_input_dict": data, + "api_base": api_base, + "headers": headers, + }, + ) + + try: + response = sync_httpx_client.delete( + url=url, headers=headers, data=json.dumps(data), timeout=timeout + ) + + except Exception as e: + raise self._handle_error( + e=e, + provider_config=responses_api_provider_config, + ) + + return responses_api_provider_config.transform_delete_response_api_response( + raw_response=response, + logging_obj=logging_obj, + ) + def create_file( self, create_file_data: CreateFileRequest, diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py index 047572657c..d4a443aedb 100644 --- a/litellm/llms/openai/responses/transformation.py +++ b/litellm/llms/openai/responses/transformation.py @@ -7,6 +7,7 @@ from litellm._logging import verbose_logger from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import * +from litellm.types.responses.main import * from litellm.types.router import GenericLiteLLMParams from ..common_utils import OpenAIError @@ -110,11 +111,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig): def get_complete_url( self, api_base: Optional[str], - api_key: Optional[str], - model: str, - optional_params: dict, litellm_params: dict, - stream: Optional[bool] = None, ) -> str: """ Get the endpoint for OpenAI responses API @@ -217,3 +214,39 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig): f"Error getting model info in OpenAIResponsesAPIConfig: {e}" ) return False + + ######################################################### + ########## DELETE RESPONSE API TRANSFORMATION ############## + ######################################################### + def transform_delete_response_api_request( + self, + response_id: str, + api_base: str, + litellm_params: GenericLiteLLMParams, + headers: dict, + ) -> Tuple[str, Dict]: + """ + Transform the delete response API request into a URL and data + + OpenAI API expects the following request + - DELETE /v1/responses/{response_id} + """ + url = f"{api_base}/{response_id}" + data: Dict = {} + return url, data + + def transform_delete_response_api_response( + self, + raw_response: httpx.Response, + logging_obj: LiteLLMLoggingObj, + ) -> DeleteResponseResult: + """ + Transform the delete response API response into a DeleteResponseResult + """ + try: + raw_response_json = raw_response.json() + except Exception: + raise OpenAIError( + message=raw_response.text, status_code=raw_response.status_code + ) + return DeleteResponseResult(**raw_response_json) diff --git a/litellm/responses/main.py b/litellm/responses/main.py index 2d7426205e..004a19a0ae 100644 --- a/litellm/responses/main.py +++ b/litellm/responses/main.py @@ -1,7 +1,7 @@ import asyncio import contextvars from functools import partial -from typing import Any, Dict, Iterable, List, Literal, Optional, Union +from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union import httpx @@ -24,6 +24,7 @@ from litellm.types.llms.openai import ( ToolChoice, ToolParam, ) +from litellm.types.responses.main import * from litellm.types.router import GenericLiteLLMParams from litellm.utils import ProviderConfigManager, client @@ -121,7 +122,8 @@ async def aresponses( if isinstance(response, ResponsesAPIResponse): response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( responses_api_response=response, - kwargs=kwargs, + litellm_metadata=kwargs.get("litellm_metadata", {}), + custom_llm_provider=custom_llm_provider, ) return response except Exception as e: @@ -253,13 +255,15 @@ def responses( fake_stream=responses_api_provider_config.should_fake_stream( model=model, stream=stream, custom_llm_provider=custom_llm_provider ), + litellm_metadata=kwargs.get("litellm_metadata", {}), ) # Update the responses_api_response_id with the model_id if isinstance(response, ResponsesAPIResponse): response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( responses_api_response=response, - kwargs=kwargs, + litellm_metadata=kwargs.get("litellm_metadata", {}), + custom_llm_provider=custom_llm_provider, ) return response @@ -271,3 +275,162 @@ def responses( completion_kwargs=local_vars, extra_kwargs=kwargs, ) + + +@client +async def adelete_responses( + response_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Optional[Dict[str, Any]] = None, + extra_query: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + # LiteLLM specific params, + custom_llm_provider: Optional[str] = None, + **kwargs, +) -> DeleteResponseResult: + """ + Async version of the DELETE Responses API + + DELETE /v1/responses/{response_id} endpoint in the responses API + + """ + local_vars = locals() + try: + loop = asyncio.get_event_loop() + kwargs["adelete_responses"] = True + + # get custom llm provider from response_id + decoded_response_id: DecodedResponseId = ( + ResponsesAPIRequestUtils._decode_responses_api_response_id( + response_id=response_id, + ) + ) + response_id = decoded_response_id.get("response_id") or response_id + custom_llm_provider = ( + decoded_response_id.get("custom_llm_provider") or custom_llm_provider + ) + + func = partial( + delete_responses, + response_id=response_id, + custom_llm_provider=custom_llm_provider, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **kwargs, + ) + + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response + return response + except Exception as e: + raise litellm.exception_type( + model=None, + custom_llm_provider=custom_llm_provider, + original_exception=e, + completion_kwargs=local_vars, + extra_kwargs=kwargs, + ) + + +@client +def delete_responses( + response_id: str, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Optional[Dict[str, Any]] = None, + extra_query: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + # LiteLLM specific params, + custom_llm_provider: Optional[str] = None, + **kwargs, +) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]: + """ + Synchronous version of the DELETE Responses API + + DELETE /v1/responses/{response_id} endpoint in the responses API + + """ + local_vars = locals() + try: + litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore + litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None) + _is_async = kwargs.pop("adelete_responses", False) is True + + # get llm provider logic + litellm_params = GenericLiteLLMParams(**kwargs) + + # get custom llm provider from response_id + decoded_response_id: DecodedResponseId = ( + ResponsesAPIRequestUtils._decode_responses_api_response_id( + response_id=response_id, + ) + ) + response_id = decoded_response_id.get("response_id") or response_id + custom_llm_provider = ( + decoded_response_id.get("custom_llm_provider") or custom_llm_provider + ) + + if custom_llm_provider is None: + raise ValueError("custom_llm_provider is required but passed as None") + + # get provider config + responses_api_provider_config: Optional[BaseResponsesAPIConfig] = ( + ProviderConfigManager.get_provider_responses_api_config( + model=None, + provider=litellm.LlmProviders(custom_llm_provider), + ) + ) + + if responses_api_provider_config is None: + raise ValueError( + f"DELETE responses is not supported for {custom_llm_provider}" + ) + + local_vars.update(kwargs) + + # Pre Call logging + litellm_logging_obj.update_environment_variables( + model=None, + optional_params={ + "response_id": response_id, + }, + litellm_params={ + "litellm_call_id": litellm_call_id, + }, + custom_llm_provider=custom_llm_provider, + ) + + # Call the handler with _is_async flag instead of directly calling the async handler + response = base_llm_http_handler.delete_response_api_handler( + response_id=response_id, + custom_llm_provider=custom_llm_provider, + responses_api_provider_config=responses_api_provider_config, + litellm_params=litellm_params, + logging_obj=litellm_logging_obj, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout or request_timeout, + _is_async=_is_async, + client=kwargs.get("client"), + ) + + return response + except Exception as e: + raise litellm.exception_type( + model=None, + custom_llm_provider=custom_llm_provider, + original_exception=e, + completion_kwargs=local_vars, + extra_kwargs=kwargs, + ) diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py index e050c47080..3e12761ba0 100644 --- a/litellm/responses/streaming_iterator.py +++ b/litellm/responses/streaming_iterator.py @@ -1,7 +1,7 @@ import asyncio import json from datetime import datetime -from typing import Optional +from typing import Any, Dict, Optional import httpx @@ -10,6 +10,7 @@ from litellm.litellm_core_utils.asyncify import run_async_function from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.thread_pool_executor import executor from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig +from litellm.responses.utils import ResponsesAPIRequestUtils from litellm.types.llms.openai import ( OutputTextDeltaEvent, ResponseCompletedEvent, @@ -33,6 +34,8 @@ class BaseResponsesAPIStreamingIterator: model: str, responses_api_provider_config: BaseResponsesAPIConfig, logging_obj: LiteLLMLoggingObj, + litellm_metadata: Optional[Dict[str, Any]] = None, + custom_llm_provider: Optional[str] = None, ): self.response = response self.model = model @@ -42,6 +45,10 @@ class BaseResponsesAPIStreamingIterator: self.completed_response: Optional[ResponsesAPIStreamingResponse] = None self.start_time = datetime.now() + # set request kwargs + self.litellm_metadata = litellm_metadata + self.custom_llm_provider = custom_llm_provider + def _process_chunk(self, chunk): """Process a single chunk of data from the stream""" if not chunk: @@ -70,6 +77,17 @@ class BaseResponsesAPIStreamingIterator: logging_obj=self.logging_obj, ) ) + + # if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider + response_object = getattr(openai_responses_api_chunk, "response", None) + if response_object: + response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id( + responses_api_response=response_object, + litellm_metadata=self.litellm_metadata, + custom_llm_provider=self.custom_llm_provider, + ) + setattr(openai_responses_api_chunk, "response", response) + # Store the completed response if ( openai_responses_api_chunk @@ -102,8 +120,17 @@ class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): model: str, responses_api_provider_config: BaseResponsesAPIConfig, logging_obj: LiteLLMLoggingObj, + litellm_metadata: Optional[Dict[str, Any]] = None, + custom_llm_provider: Optional[str] = None, ): - super().__init__(response, model, responses_api_provider_config, logging_obj) + super().__init__( + response, + model, + responses_api_provider_config, + logging_obj, + litellm_metadata, + custom_llm_provider, + ) self.stream_iterator = response.aiter_lines() def __aiter__(self): @@ -163,8 +190,17 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): model: str, responses_api_provider_config: BaseResponsesAPIConfig, logging_obj: LiteLLMLoggingObj, + litellm_metadata: Optional[Dict[str, Any]] = None, + custom_llm_provider: Optional[str] = None, ): - super().__init__(response, model, responses_api_provider_config, logging_obj) + super().__init__( + response, + model, + responses_api_provider_config, + logging_obj, + litellm_metadata, + custom_llm_provider, + ) self.stream_iterator = response.iter_lines() def __iter__(self): @@ -228,12 +264,16 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator): model: str, responses_api_provider_config: BaseResponsesAPIConfig, logging_obj: LiteLLMLoggingObj, + litellm_metadata: Optional[Dict[str, Any]] = None, + custom_llm_provider: Optional[str] = None, ): super().__init__( response=response, model=model, responses_api_provider_config=responses_api_provider_config, logging_obj=logging_obj, + litellm_metadata=litellm_metadata, + custom_llm_provider=custom_llm_provider, ) # one-time transform diff --git a/litellm/responses/utils.py b/litellm/responses/utils.py index 5e95cbd93a..9fa455de71 100644 --- a/litellm/responses/utils.py +++ b/litellm/responses/utils.py @@ -1,5 +1,5 @@ import base64 -from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints +from typing import Any, Dict, Optional, Union, cast, get_type_hints import litellm from litellm._logging import verbose_logger @@ -9,6 +9,7 @@ from litellm.types.llms.openai import ( ResponsesAPIOptionalRequestParams, ResponsesAPIResponse, ) +from litellm.types.responses.main import DecodedResponseId from litellm.types.utils import SpecialEnums, Usage @@ -83,30 +84,36 @@ class ResponsesAPIRequestUtils: @staticmethod def _update_responses_api_response_id_with_model_id( responses_api_response: ResponsesAPIResponse, - kwargs: Dict[str, Any], + custom_llm_provider: Optional[str], + litellm_metadata: Optional[Dict[str, Any]] = None, ) -> ResponsesAPIResponse: - """Update the responses_api_response_id with the model_id""" - litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {} + """ + Update the responses_api_response_id with model_id and custom_llm_provider + + This builds a composite ID containing the custom LLM provider, model ID, and original response ID + """ + litellm_metadata = litellm_metadata or {} model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {} model_id = model_info.get("id") updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id( model_id=model_id, + custom_llm_provider=custom_llm_provider, response_id=responses_api_response.id, ) + responses_api_response.id = updated_id return responses_api_response @staticmethod def _build_responses_api_response_id( + custom_llm_provider: Optional[str], model_id: Optional[str], response_id: str, ) -> str: """Build the responses_api_response_id""" - if model_id is None: - return response_id assembled_id: str = str( SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value - ).format(model_id, response_id) + ).format(custom_llm_provider, model_id, response_id) base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode( "utf-8" ) @@ -115,12 +122,12 @@ class ResponsesAPIRequestUtils: @staticmethod def _decode_responses_api_response_id( response_id: str, - ) -> Tuple[Optional[str], str]: + ) -> DecodedResponseId: """ Decode the responses_api_response_id Returns: - Tuple of model_id, response_id (from upstream provider) + DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id """ try: # Remove prefix and decode @@ -129,16 +136,45 @@ class ResponsesAPIRequestUtils: # Parse components using known prefixes if ";" not in decoded_id: - return None, response_id + return DecodedResponseId( + custom_llm_provider=None, + model_id=None, + response_id=response_id, + ) - model_part, response_part = decoded_id.split(";", 1) - model_id = model_part.replace("litellm:model_id:", "") - decoded_response_id = response_part.replace("response_id:", "") + parts = decoded_id.split(";") - return model_id, decoded_response_id + # Format: litellm:custom_llm_provider:{};model_id:{};response_id:{} + custom_llm_provider = None + model_id = None + + if ( + len(parts) >= 3 + ): # Full format with custom_llm_provider, model_id, and response_id + custom_llm_provider_part = parts[0] + model_id_part = parts[1] + response_part = parts[2] + + custom_llm_provider = custom_llm_provider_part.replace( + "litellm:custom_llm_provider:", "" + ) + model_id = model_id_part.replace("model_id:", "") + decoded_response_id = response_part.replace("response_id:", "") + else: + decoded_response_id = response_id + + return DecodedResponseId( + custom_llm_provider=custom_llm_provider, + model_id=model_id, + response_id=decoded_response_id, + ) except Exception as e: verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}") - return None, response_id + return DecodedResponseId( + custom_llm_provider=None, + model_id=None, + response_id=response_id, + ) class ResponseAPILoggingUtils: diff --git a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py index 445460c237..b030fc28c8 100644 --- a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py +++ b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py @@ -31,11 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger): if previous_response_id is None: return healthy_deployments - model_id, response_id = ( - ResponsesAPIRequestUtils._decode_responses_api_response_id( - response_id=previous_response_id, - ) + decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id( + response_id=previous_response_id, ) + model_id = decoded_response.get("model_id") if model_id is None: return healthy_deployments diff --git a/litellm/types/responses/main.py b/litellm/types/responses/main.py index 63a548bbfd..b85df206bc 100644 --- a/litellm/types/responses/main.py +++ b/litellm/types/responses/main.py @@ -1,5 +1,6 @@ from typing import Literal +from pydantic import PrivateAttr from typing_extensions import Any, List, Optional, TypedDict from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject @@ -46,3 +47,30 @@ class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject): status: str # "completed", "in_progress", etc. role: str # "assistant", "user", etc. content: List[OutputText] + + +class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject): + """ + Result of a delete response request + + { + "id": "resp_6786a1bec27481909a17d673315b29f6", + "object": "response", + "deleted": true + } + """ + + id: Optional[str] + object: Optional[str] + deleted: Optional[bool] + + # Define private attributes using PrivateAttr + _hidden_params: dict = PrivateAttr(default_factory=dict) + + +class DecodedResponseId(TypedDict, total=False): + """Structure representing a decoded response ID""" + + custom_llm_provider: Optional[str] + model_id: Optional[str] + response_id: str diff --git a/litellm/types/utils.py b/litellm/types/utils.py index e9859513b9..532162e60f 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2254,7 +2254,9 @@ class SpecialEnums(Enum): LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy" LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}" - LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}" + LITELLM_MANAGED_RESPONSE_COMPLETE_STR = ( + "litellm:custom_llm_provider:{};model_id:{};response_id:{}" + ) LLMResponseTypes = Union[ diff --git a/litellm/utils.py b/litellm/utils.py index 38e604943a..0150c4f43f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915 function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None ## DYNAMIC CALLBACKS ## - dynamic_callbacks: Optional[ - List[Union[str, Callable, CustomLogger]] - ] = kwargs.pop("callbacks", None) + dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = ( + kwargs.pop("callbacks", None) + ) all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) if len(all_callbacks) > 0: @@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915 exception=e, retry_policy=kwargs.get("retry_policy"), ) - kwargs[ - "retry_policy" - ] = reset_retry_policy() # prevent infinite loops + kwargs["retry_policy"] = ( + reset_retry_policy() + ) # prevent infinite loops litellm.num_retries = ( None # set retries to None to prevent infinite loops ) @@ -3028,16 +3028,16 @@ def get_optional_params( # noqa: PLR0915 True # so that main.py adds the function call to the prompt ) if "tools" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("tools") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("tools") + ) non_default_params.pop( "tool_choice", None ) # causes ollama requests to hang elif "functions" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("functions") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("functions") + ) elif ( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead @@ -3060,10 +3060,10 @@ def get_optional_params( # noqa: PLR0915 if "response_format" in non_default_params: if provider_config is not None: - non_default_params[ - "response_format" - ] = provider_config.get_json_schema_from_pydantic_object( - response_format=non_default_params["response_format"] + non_default_params["response_format"] = ( + provider_config.get_json_schema_from_pydantic_object( + response_format=non_default_params["response_format"] + ) ) else: non_default_params["response_format"] = type_to_response_format_param( @@ -4079,9 +4079,9 @@ def _count_characters(text: str) -> int: def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str: - _choices: Union[ - List[Union[Choices, StreamingChoices]], List[StreamingChoices] - ] = response_obj.choices + _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = ( + response_obj.choices + ) response_str = "" for choice in _choices: @@ -6625,8 +6625,8 @@ class ProviderConfigManager: @staticmethod def get_provider_responses_api_config( - model: str, provider: LlmProviders, + model: Optional[str] = None, ) -> Optional[BaseResponsesAPIConfig]: if litellm.LlmProviders.OPENAI == provider: return litellm.OpenAIResponsesAPIConfig() diff --git a/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py b/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py index 202d0aea23..3b9ae72da7 100644 --- a/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py +++ b/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py @@ -203,9 +203,6 @@ class TestOpenAIResponsesAPIConfig: result = self.config.get_complete_url( api_base=api_base, - model=self.model, - api_key="test_api_key", - optional_params={}, litellm_params={}, ) @@ -215,9 +212,6 @@ class TestOpenAIResponsesAPIConfig: with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"): result = self.config.get_complete_url( api_base=None, - model=self.model, - api_key="test_api_key", - optional_params={}, litellm_params={}, ) @@ -231,9 +225,6 @@ class TestOpenAIResponsesAPIConfig: ): result = self.config.get_complete_url( api_base=None, - model=self.model, - api_key="test_api_key", - optional_params={}, litellm_params={}, ) @@ -247,9 +238,6 @@ class TestOpenAIResponsesAPIConfig: ): result = self.config.get_complete_url( api_base=None, - model=self.model, - api_key="test_api_key", - optional_params={}, litellm_params={}, ) @@ -260,9 +248,6 @@ class TestOpenAIResponsesAPIConfig: result = self.config.get_complete_url( api_base=api_base, - model=self.model, - api_key="test_api_key", - optional_params={}, litellm_params={}, ) diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py index fd39c13604..905b9b3219 100644 --- a/tests/llm_responses_api_testing/base_responses_api.py +++ b/tests/llm_responses_api_testing/base_responses_api.py @@ -189,6 +189,90 @@ class BaseResponsesAPITest(ABC): + @pytest.mark.parametrize("sync_mode", [False, True]) + @pytest.mark.asyncio + async def test_basic_openai_responses_delete_endpoint(self, sync_mode): + litellm._turn_on_debug() + litellm.set_verbose = True + base_completion_call_args = self.get_base_completion_call_args() + if sync_mode: + response = litellm.responses( + input="Basic ping", max_output_tokens=20, + **base_completion_call_args + ) + + # delete the response + if isinstance(response, ResponsesAPIResponse): + litellm.delete_responses( + response_id=response.id, + **base_completion_call_args + ) + else: + raise ValueError("response is not a ResponsesAPIResponse") + else: + response = await litellm.aresponses( + input="Basic ping", max_output_tokens=20, + **base_completion_call_args + ) + + # async delete the response + if isinstance(response, ResponsesAPIResponse): + await litellm.adelete_responses( + response_id=response.id, + **base_completion_call_args + ) + else: + raise ValueError("response is not a ResponsesAPIResponse") + + + @pytest.mark.parametrize("sync_mode", [True, False]) + @pytest.mark.asyncio + async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode): + #litellm._turn_on_debug() + #litellm.set_verbose = True + base_completion_call_args = self.get_base_completion_call_args() + response_id = None + if sync_mode: + response_id = None + response = litellm.responses( + input="Basic ping", max_output_tokens=20, + stream=True, + **base_completion_call_args + ) + for event in response: + print("litellm response=", json.dumps(event, indent=4, default=str)) + if "response" in event: + response_obj = event.get("response") + if response_obj is not None: + response_id = response_obj.get("id") + print("got response_id=", response_id) + + # delete the response + assert response_id is not None + litellm.delete_responses( + response_id=response_id, + **base_completion_call_args + ) + else: + response = await litellm.aresponses( + input="Basic ping", max_output_tokens=20, + stream=True, + **base_completion_call_args + ) + async for event in response: + print("litellm response=", json.dumps(event, indent=4, default=str)) + if "response" in event: + response_obj = event.get("response") + if response_obj is not None: + response_id = response_obj.get("id") + print("got response_id=", response_id) + + # delete the response + assert response_id is not None + await litellm.adelete_responses( + response_id=response_id, + **base_completion_call_args + ) diff --git a/tests/llm_responses_api_testing/test_anthropic_responses_api.py b/tests/llm_responses_api_testing/test_anthropic_responses_api.py index 0fcb771f73..b02c9b8d11 100644 --- a/tests/llm_responses_api_testing/test_anthropic_responses_api.py +++ b/tests/llm_responses_api_testing/test_anthropic_responses_api.py @@ -29,6 +29,12 @@ class TestAnthropicResponsesAPITest(BaseResponsesAPITest): return { "model": "anthropic/claude-3-5-sonnet-latest", } + + async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False): + pass + + async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False): + pass def test_multiturn_tool_calls():