mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
[Feat] Add Support for DELETE /v1/responses/{response_id} on OpenAI, Azure OpenAI (#10205)
* add transform_delete_response_api_request to base responses config * add transform_delete_response_api_request * add delete_response_api_handler * fixes for deleting responses, response API * add adelete_responses * add async test_basic_openai_responses_delete_endpoint * test_basic_openai_responses_delete_endpoint * working delete for streaming on responses API * fixes azure transformation * TestAnthropicResponsesAPITest * fix code check * fix linting * fixes for get_complete_url * test_basic_openai_responses_streaming_delete_endpoint * streaming fixes
This commit is contained in:
parent
9919db2742
commit
586938ed3c
15 changed files with 729 additions and 83 deletions
|
@ -1,11 +1,14 @@
|
||||||
from typing import TYPE_CHECKING, Any, Optional, cast
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
|
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import *
|
from litellm.types.llms.openai import *
|
||||||
|
from litellm.types.responses.main import *
|
||||||
|
from litellm.types.router import GenericLiteLLMParams
|
||||||
from litellm.utils import _add_path_to_api_base
|
from litellm.utils import _add_path_to_api_base
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -41,11 +44,7 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
api_key: Optional[str],
|
|
||||||
model: str,
|
|
||||||
optional_params: dict,
|
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
stream: Optional[bool] = None,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Constructs a complete URL for the API request.
|
Constructs a complete URL for the API request.
|
||||||
|
@ -92,3 +91,48 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
|
||||||
final_url = httpx.URL(new_url).copy_with(params=query_params)
|
final_url = httpx.URL(new_url).copy_with(params=query_params)
|
||||||
|
|
||||||
return str(final_url)
|
return str(final_url)
|
||||||
|
|
||||||
|
#########################################################
|
||||||
|
########## DELETE RESPONSE API TRANSFORMATION ##############
|
||||||
|
#########################################################
|
||||||
|
def transform_delete_response_api_request(
|
||||||
|
self,
|
||||||
|
response_id: str,
|
||||||
|
api_base: str,
|
||||||
|
litellm_params: GenericLiteLLMParams,
|
||||||
|
headers: dict,
|
||||||
|
) -> Tuple[str, Dict]:
|
||||||
|
"""
|
||||||
|
Transform the delete response API request into a URL and data
|
||||||
|
|
||||||
|
Azure OpenAI API expects the following request:
|
||||||
|
- DELETE /openai/responses/{response_id}?api-version=xxx
|
||||||
|
|
||||||
|
This function handles URLs with query parameters by inserting the response_id
|
||||||
|
at the correct location (before any query parameters).
|
||||||
|
"""
|
||||||
|
from urllib.parse import urlparse, urlunparse
|
||||||
|
|
||||||
|
# Parse the URL to separate its components
|
||||||
|
parsed_url = urlparse(api_base)
|
||||||
|
|
||||||
|
# Insert the response_id at the end of the path component
|
||||||
|
# Remove trailing slash if present to avoid double slashes
|
||||||
|
path = parsed_url.path.rstrip("/")
|
||||||
|
new_path = f"{path}/{response_id}"
|
||||||
|
|
||||||
|
# Reconstruct the URL with all original components but with the modified path
|
||||||
|
delete_url = urlunparse(
|
||||||
|
(
|
||||||
|
parsed_url.scheme, # http, https
|
||||||
|
parsed_url.netloc, # domain name, port
|
||||||
|
new_path, # path with response_id added
|
||||||
|
parsed_url.params, # parameters
|
||||||
|
parsed_url.query, # query string
|
||||||
|
parsed_url.fragment, # fragment
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
data: Dict = {}
|
||||||
|
verbose_logger.debug(f"delete response url={delete_url}")
|
||||||
|
return delete_url, data
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import types
|
import types
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ from litellm.types.llms.openai import (
|
||||||
ResponsesAPIResponse,
|
ResponsesAPIResponse,
|
||||||
ResponsesAPIStreamingResponse,
|
ResponsesAPIStreamingResponse,
|
||||||
)
|
)
|
||||||
|
from litellm.types.responses.main import *
|
||||||
from litellm.types.router import GenericLiteLLMParams
|
from litellm.types.router import GenericLiteLLMParams
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -73,11 +74,7 @@ class BaseResponsesAPIConfig(ABC):
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
api_key: Optional[str],
|
|
||||||
model: str,
|
|
||||||
optional_params: dict,
|
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
stream: Optional[bool] = None,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
OPTIONAL
|
OPTIONAL
|
||||||
|
@ -122,6 +119,31 @@ class BaseResponsesAPIConfig(ABC):
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
#########################################################
|
||||||
|
########## DELETE RESPONSE API TRANSFORMATION ##############
|
||||||
|
#########################################################
|
||||||
|
@abstractmethod
|
||||||
|
def transform_delete_response_api_request(
|
||||||
|
self,
|
||||||
|
response_id: str,
|
||||||
|
api_base: str,
|
||||||
|
litellm_params: GenericLiteLLMParams,
|
||||||
|
headers: dict,
|
||||||
|
) -> Tuple[str, Dict]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def transform_delete_response_api_response(
|
||||||
|
self,
|
||||||
|
raw_response: httpx.Response,
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
) -> DeleteResponseResult:
|
||||||
|
pass
|
||||||
|
|
||||||
|
#########################################################
|
||||||
|
########## END DELETE RESPONSE API TRANSFORMATION ##########
|
||||||
|
#########################################################
|
||||||
|
|
||||||
def get_error_class(
|
def get_error_class(
|
||||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||||
) -> BaseLLMException:
|
) -> BaseLLMException:
|
||||||
|
|
|
@ -650,6 +650,49 @@ class HTTPHandler:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def delete(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: Optional[Union[dict, str]] = None, # type: ignore
|
||||||
|
json: Optional[dict] = None,
|
||||||
|
params: Optional[dict] = None,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
|
stream: bool = False,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
if timeout is not None:
|
||||||
|
req = self.client.build_request(
|
||||||
|
"DELETE", url, data=data, json=json, params=params, headers=headers, timeout=timeout # type: ignore
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
req = self.client.build_request(
|
||||||
|
"DELETE", url, data=data, json=json, params=params, headers=headers # type: ignore
|
||||||
|
)
|
||||||
|
response = self.client.send(req, stream=stream)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
raise litellm.Timeout(
|
||||||
|
message=f"Connection timed out after {timeout} seconds.",
|
||||||
|
model="default-model-name",
|
||||||
|
llm_provider="litellm-httpx-handler",
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
if stream is True:
|
||||||
|
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
||||||
|
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
||||||
|
else:
|
||||||
|
error_text = mask_sensitive_info(e.response.text)
|
||||||
|
setattr(e, "message", error_text)
|
||||||
|
setattr(e, "text", error_text)
|
||||||
|
|
||||||
|
setattr(e, "status_code", e.response.status_code)
|
||||||
|
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
def __del__(self) -> None:
|
def __del__(self) -> None:
|
||||||
try:
|
try:
|
||||||
self.close()
|
self.close()
|
||||||
|
|
|
@ -36,6 +36,7 @@ from litellm.types.llms.openai import (
|
||||||
ResponsesAPIResponse,
|
ResponsesAPIResponse,
|
||||||
)
|
)
|
||||||
from litellm.types.rerank import OptionalRerankParams, RerankResponse
|
from litellm.types.rerank import OptionalRerankParams, RerankResponse
|
||||||
|
from litellm.types.responses.main import DeleteResponseResult
|
||||||
from litellm.types.router import GenericLiteLLMParams
|
from litellm.types.router import GenericLiteLLMParams
|
||||||
from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
|
from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
|
||||||
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
|
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
|
||||||
|
@ -1015,6 +1016,7 @@ class BaseLLMHTTPHandler:
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
_is_async: bool = False,
|
_is_async: bool = False,
|
||||||
fake_stream: bool = False,
|
fake_stream: bool = False,
|
||||||
|
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||||
) -> Union[
|
) -> Union[
|
||||||
ResponsesAPIResponse,
|
ResponsesAPIResponse,
|
||||||
BaseResponsesAPIStreamingIterator,
|
BaseResponsesAPIStreamingIterator,
|
||||||
|
@ -1041,6 +1043,7 @@ class BaseLLMHTTPHandler:
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||||
fake_stream=fake_stream,
|
fake_stream=fake_stream,
|
||||||
|
litellm_metadata=litellm_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
if client is None or not isinstance(client, HTTPHandler):
|
if client is None or not isinstance(client, HTTPHandler):
|
||||||
|
@ -1064,11 +1067,7 @@ class BaseLLMHTTPHandler:
|
||||||
|
|
||||||
api_base = responses_api_provider_config.get_complete_url(
|
api_base = responses_api_provider_config.get_complete_url(
|
||||||
api_base=litellm_params.api_base,
|
api_base=litellm_params.api_base,
|
||||||
api_key=litellm_params.api_key,
|
|
||||||
model=model,
|
|
||||||
optional_params=response_api_optional_request_params,
|
|
||||||
litellm_params=dict(litellm_params),
|
litellm_params=dict(litellm_params),
|
||||||
stream=stream,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
data = responses_api_provider_config.transform_responses_api_request(
|
data = responses_api_provider_config.transform_responses_api_request(
|
||||||
|
@ -1113,6 +1112,8 @@ class BaseLLMHTTPHandler:
|
||||||
model=model,
|
model=model,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
responses_api_provider_config=responses_api_provider_config,
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
litellm_metadata=litellm_metadata,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
|
|
||||||
return SyncResponsesAPIStreamingIterator(
|
return SyncResponsesAPIStreamingIterator(
|
||||||
|
@ -1120,6 +1121,8 @@ class BaseLLMHTTPHandler:
|
||||||
model=model,
|
model=model,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
responses_api_provider_config=responses_api_provider_config,
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
litellm_metadata=litellm_metadata,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# For non-streaming requests
|
# For non-streaming requests
|
||||||
|
@ -1156,6 +1159,7 @@ class BaseLLMHTTPHandler:
|
||||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
fake_stream: bool = False,
|
fake_stream: bool = False,
|
||||||
|
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||||
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
|
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
|
||||||
"""
|
"""
|
||||||
Async version of the responses API handler.
|
Async version of the responses API handler.
|
||||||
|
@ -1183,11 +1187,7 @@ class BaseLLMHTTPHandler:
|
||||||
|
|
||||||
api_base = responses_api_provider_config.get_complete_url(
|
api_base = responses_api_provider_config.get_complete_url(
|
||||||
api_base=litellm_params.api_base,
|
api_base=litellm_params.api_base,
|
||||||
api_key=litellm_params.api_key,
|
|
||||||
model=model,
|
|
||||||
optional_params=response_api_optional_request_params,
|
|
||||||
litellm_params=dict(litellm_params),
|
litellm_params=dict(litellm_params),
|
||||||
stream=stream,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
data = responses_api_provider_config.transform_responses_api_request(
|
data = responses_api_provider_config.transform_responses_api_request(
|
||||||
|
@ -1234,6 +1234,8 @@ class BaseLLMHTTPHandler:
|
||||||
model=model,
|
model=model,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
responses_api_provider_config=responses_api_provider_config,
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
litellm_metadata=litellm_metadata,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Return the streaming iterator
|
# Return the streaming iterator
|
||||||
|
@ -1242,6 +1244,8 @@ class BaseLLMHTTPHandler:
|
||||||
model=model,
|
model=model,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
responses_api_provider_config=responses_api_provider_config,
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
litellm_metadata=litellm_metadata,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# For non-streaming, proceed as before
|
# For non-streaming, proceed as before
|
||||||
|
@ -1265,6 +1269,163 @@ class BaseLLMHTTPHandler:
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def async_delete_response_api_handler(
|
||||||
|
self,
|
||||||
|
response_id: str,
|
||||||
|
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||||
|
litellm_params: GenericLiteLLMParams,
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
custom_llm_provider: Optional[str],
|
||||||
|
extra_headers: Optional[Dict[str, Any]] = None,
|
||||||
|
extra_body: Optional[Dict[str, Any]] = None,
|
||||||
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
|
_is_async: bool = False,
|
||||||
|
) -> DeleteResponseResult:
|
||||||
|
"""
|
||||||
|
Async version of the delete response API handler.
|
||||||
|
Uses async HTTP client to make requests.
|
||||||
|
"""
|
||||||
|
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||||
|
async_httpx_client = get_async_httpx_client(
|
||||||
|
llm_provider=litellm.LlmProviders(custom_llm_provider),
|
||||||
|
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
async_httpx_client = client
|
||||||
|
|
||||||
|
headers = responses_api_provider_config.validate_environment(
|
||||||
|
api_key=litellm_params.api_key,
|
||||||
|
headers=extra_headers or {},
|
||||||
|
model="None",
|
||||||
|
)
|
||||||
|
|
||||||
|
if extra_headers:
|
||||||
|
headers.update(extra_headers)
|
||||||
|
|
||||||
|
api_base = responses_api_provider_config.get_complete_url(
|
||||||
|
api_base=litellm_params.api_base,
|
||||||
|
litellm_params=dict(litellm_params),
|
||||||
|
)
|
||||||
|
|
||||||
|
url, data = responses_api_provider_config.transform_delete_response_api_request(
|
||||||
|
response_id=response_id,
|
||||||
|
api_base=api_base,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=input,
|
||||||
|
api_key="",
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": data,
|
||||||
|
"api_base": api_base,
|
||||||
|
"headers": headers,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await async_httpx_client.delete(
|
||||||
|
url=url, headers=headers, data=json.dumps(data), timeout=timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise self._handle_error(
|
||||||
|
e=e,
|
||||||
|
provider_config=responses_api_provider_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
return responses_api_provider_config.transform_delete_response_api_response(
|
||||||
|
raw_response=response,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
|
||||||
|
def delete_response_api_handler(
|
||||||
|
self,
|
||||||
|
response_id: str,
|
||||||
|
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||||
|
litellm_params: GenericLiteLLMParams,
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
custom_llm_provider: Optional[str],
|
||||||
|
extra_headers: Optional[Dict[str, Any]] = None,
|
||||||
|
extra_body: Optional[Dict[str, Any]] = None,
|
||||||
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
|
_is_async: bool = False,
|
||||||
|
) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
|
||||||
|
"""
|
||||||
|
Async version of the responses API handler.
|
||||||
|
Uses async HTTP client to make requests.
|
||||||
|
"""
|
||||||
|
if _is_async:
|
||||||
|
return self.async_delete_response_api_handler(
|
||||||
|
response_id=response_id,
|
||||||
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
extra_body=extra_body,
|
||||||
|
timeout=timeout,
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
if client is None or not isinstance(client, HTTPHandler):
|
||||||
|
sync_httpx_client = _get_httpx_client(
|
||||||
|
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
sync_httpx_client = client
|
||||||
|
|
||||||
|
headers = responses_api_provider_config.validate_environment(
|
||||||
|
api_key=litellm_params.api_key,
|
||||||
|
headers=extra_headers or {},
|
||||||
|
model="None",
|
||||||
|
)
|
||||||
|
|
||||||
|
if extra_headers:
|
||||||
|
headers.update(extra_headers)
|
||||||
|
|
||||||
|
api_base = responses_api_provider_config.get_complete_url(
|
||||||
|
api_base=litellm_params.api_base,
|
||||||
|
litellm_params=dict(litellm_params),
|
||||||
|
)
|
||||||
|
|
||||||
|
url, data = responses_api_provider_config.transform_delete_response_api_request(
|
||||||
|
response_id=response_id,
|
||||||
|
api_base=api_base,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=input,
|
||||||
|
api_key="",
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": data,
|
||||||
|
"api_base": api_base,
|
||||||
|
"headers": headers,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = sync_httpx_client.delete(
|
||||||
|
url=url, headers=headers, data=json.dumps(data), timeout=timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise self._handle_error(
|
||||||
|
e=e,
|
||||||
|
provider_config=responses_api_provider_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
return responses_api_provider_config.transform_delete_response_api_response(
|
||||||
|
raw_response=response,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
|
||||||
def create_file(
|
def create_file(
|
||||||
self,
|
self,
|
||||||
create_file_data: CreateFileRequest,
|
create_file_data: CreateFileRequest,
|
||||||
|
|
|
@ -7,6 +7,7 @@ from litellm._logging import verbose_logger
|
||||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import *
|
from litellm.types.llms.openai import *
|
||||||
|
from litellm.types.responses.main import *
|
||||||
from litellm.types.router import GenericLiteLLMParams
|
from litellm.types.router import GenericLiteLLMParams
|
||||||
|
|
||||||
from ..common_utils import OpenAIError
|
from ..common_utils import OpenAIError
|
||||||
|
@ -110,11 +111,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
api_key: Optional[str],
|
|
||||||
model: str,
|
|
||||||
optional_params: dict,
|
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
stream: Optional[bool] = None,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Get the endpoint for OpenAI responses API
|
Get the endpoint for OpenAI responses API
|
||||||
|
@ -217,3 +214,39 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
|
||||||
f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
|
f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
#########################################################
|
||||||
|
########## DELETE RESPONSE API TRANSFORMATION ##############
|
||||||
|
#########################################################
|
||||||
|
def transform_delete_response_api_request(
|
||||||
|
self,
|
||||||
|
response_id: str,
|
||||||
|
api_base: str,
|
||||||
|
litellm_params: GenericLiteLLMParams,
|
||||||
|
headers: dict,
|
||||||
|
) -> Tuple[str, Dict]:
|
||||||
|
"""
|
||||||
|
Transform the delete response API request into a URL and data
|
||||||
|
|
||||||
|
OpenAI API expects the following request
|
||||||
|
- DELETE /v1/responses/{response_id}
|
||||||
|
"""
|
||||||
|
url = f"{api_base}/{response_id}"
|
||||||
|
data: Dict = {}
|
||||||
|
return url, data
|
||||||
|
|
||||||
|
def transform_delete_response_api_response(
|
||||||
|
self,
|
||||||
|
raw_response: httpx.Response,
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
) -> DeleteResponseResult:
|
||||||
|
"""
|
||||||
|
Transform the delete response API response into a DeleteResponseResult
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
raw_response_json = raw_response.json()
|
||||||
|
except Exception:
|
||||||
|
raise OpenAIError(
|
||||||
|
message=raw_response.text, status_code=raw_response.status_code
|
||||||
|
)
|
||||||
|
return DeleteResponseResult(**raw_response_json)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import contextvars
|
import contextvars
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Any, Dict, Iterable, List, Literal, Optional, Union
|
from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ from litellm.types.llms.openai import (
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
ToolParam,
|
ToolParam,
|
||||||
)
|
)
|
||||||
|
from litellm.types.responses.main import *
|
||||||
from litellm.types.router import GenericLiteLLMParams
|
from litellm.types.router import GenericLiteLLMParams
|
||||||
from litellm.utils import ProviderConfigManager, client
|
from litellm.utils import ProviderConfigManager, client
|
||||||
|
|
||||||
|
@ -121,7 +122,8 @@ async def aresponses(
|
||||||
if isinstance(response, ResponsesAPIResponse):
|
if isinstance(response, ResponsesAPIResponse):
|
||||||
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||||
responses_api_response=response,
|
responses_api_response=response,
|
||||||
kwargs=kwargs,
|
litellm_metadata=kwargs.get("litellm_metadata", {}),
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -253,13 +255,15 @@ def responses(
|
||||||
fake_stream=responses_api_provider_config.should_fake_stream(
|
fake_stream=responses_api_provider_config.should_fake_stream(
|
||||||
model=model, stream=stream, custom_llm_provider=custom_llm_provider
|
model=model, stream=stream, custom_llm_provider=custom_llm_provider
|
||||||
),
|
),
|
||||||
|
litellm_metadata=kwargs.get("litellm_metadata", {}),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update the responses_api_response_id with the model_id
|
# Update the responses_api_response_id with the model_id
|
||||||
if isinstance(response, ResponsesAPIResponse):
|
if isinstance(response, ResponsesAPIResponse):
|
||||||
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||||
responses_api_response=response,
|
responses_api_response=response,
|
||||||
kwargs=kwargs,
|
litellm_metadata=kwargs.get("litellm_metadata", {}),
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
@ -271,3 +275,162 @@ def responses(
|
||||||
completion_kwargs=local_vars,
|
completion_kwargs=local_vars,
|
||||||
extra_kwargs=kwargs,
|
extra_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
|
async def adelete_responses(
|
||||||
|
response_id: str,
|
||||||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||||
|
extra_headers: Optional[Dict[str, Any]] = None,
|
||||||
|
extra_query: Optional[Dict[str, Any]] = None,
|
||||||
|
extra_body: Optional[Dict[str, Any]] = None,
|
||||||
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
|
# LiteLLM specific params,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> DeleteResponseResult:
|
||||||
|
"""
|
||||||
|
Async version of the DELETE Responses API
|
||||||
|
|
||||||
|
DELETE /v1/responses/{response_id} endpoint in the responses API
|
||||||
|
|
||||||
|
"""
|
||||||
|
local_vars = locals()
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
kwargs["adelete_responses"] = True
|
||||||
|
|
||||||
|
# get custom llm provider from response_id
|
||||||
|
decoded_response_id: DecodedResponseId = (
|
||||||
|
ResponsesAPIRequestUtils._decode_responses_api_response_id(
|
||||||
|
response_id=response_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response_id = decoded_response_id.get("response_id") or response_id
|
||||||
|
custom_llm_provider = (
|
||||||
|
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
func = partial(
|
||||||
|
delete_responses,
|
||||||
|
response_id=response_id,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
extra_query=extra_query,
|
||||||
|
extra_body=extra_body,
|
||||||
|
timeout=timeout,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx = contextvars.copy_context()
|
||||||
|
func_with_context = partial(ctx.run, func)
|
||||||
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
|
||||||
|
if asyncio.iscoroutine(init_response):
|
||||||
|
response = await init_response
|
||||||
|
else:
|
||||||
|
response = init_response
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise litellm.exception_type(
|
||||||
|
model=None,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
original_exception=e,
|
||||||
|
completion_kwargs=local_vars,
|
||||||
|
extra_kwargs=kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
|
def delete_responses(
|
||||||
|
response_id: str,
|
||||||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||||
|
extra_headers: Optional[Dict[str, Any]] = None,
|
||||||
|
extra_query: Optional[Dict[str, Any]] = None,
|
||||||
|
extra_body: Optional[Dict[str, Any]] = None,
|
||||||
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
|
# LiteLLM specific params,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
|
||||||
|
"""
|
||||||
|
Synchronous version of the DELETE Responses API
|
||||||
|
|
||||||
|
DELETE /v1/responses/{response_id} endpoint in the responses API
|
||||||
|
|
||||||
|
"""
|
||||||
|
local_vars = locals()
|
||||||
|
try:
|
||||||
|
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
|
||||||
|
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
|
||||||
|
_is_async = kwargs.pop("adelete_responses", False) is True
|
||||||
|
|
||||||
|
# get llm provider logic
|
||||||
|
litellm_params = GenericLiteLLMParams(**kwargs)
|
||||||
|
|
||||||
|
# get custom llm provider from response_id
|
||||||
|
decoded_response_id: DecodedResponseId = (
|
||||||
|
ResponsesAPIRequestUtils._decode_responses_api_response_id(
|
||||||
|
response_id=response_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response_id = decoded_response_id.get("response_id") or response_id
|
||||||
|
custom_llm_provider = (
|
||||||
|
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
if custom_llm_provider is None:
|
||||||
|
raise ValueError("custom_llm_provider is required but passed as None")
|
||||||
|
|
||||||
|
# get provider config
|
||||||
|
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
|
||||||
|
ProviderConfigManager.get_provider_responses_api_config(
|
||||||
|
model=None,
|
||||||
|
provider=litellm.LlmProviders(custom_llm_provider),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if responses_api_provider_config is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"DELETE responses is not supported for {custom_llm_provider}"
|
||||||
|
)
|
||||||
|
|
||||||
|
local_vars.update(kwargs)
|
||||||
|
|
||||||
|
# Pre Call logging
|
||||||
|
litellm_logging_obj.update_environment_variables(
|
||||||
|
model=None,
|
||||||
|
optional_params={
|
||||||
|
"response_id": response_id,
|
||||||
|
},
|
||||||
|
litellm_params={
|
||||||
|
"litellm_call_id": litellm_call_id,
|
||||||
|
},
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Call the handler with _is_async flag instead of directly calling the async handler
|
||||||
|
response = base_llm_http_handler.delete_response_api_handler(
|
||||||
|
response_id=response_id,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logging_obj=litellm_logging_obj,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
extra_body=extra_body,
|
||||||
|
timeout=timeout or request_timeout,
|
||||||
|
_is_async=_is_async,
|
||||||
|
client=kwargs.get("client"),
|
||||||
|
)
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise litellm.exception_type(
|
||||||
|
model=None,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
original_exception=e,
|
||||||
|
completion_kwargs=local_vars,
|
||||||
|
extra_kwargs=kwargs,
|
||||||
|
)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ from litellm.litellm_core_utils.asyncify import run_async_function
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
from litellm.litellm_core_utils.thread_pool_executor import executor
|
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||||
|
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
OutputTextDeltaEvent,
|
OutputTextDeltaEvent,
|
||||||
ResponseCompletedEvent,
|
ResponseCompletedEvent,
|
||||||
|
@ -33,6 +34,8 @@ class BaseResponsesAPIStreamingIterator:
|
||||||
model: str,
|
model: str,
|
||||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||||
logging_obj: LiteLLMLoggingObj,
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
):
|
):
|
||||||
self.response = response
|
self.response = response
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -42,6 +45,10 @@ class BaseResponsesAPIStreamingIterator:
|
||||||
self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
|
self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
|
||||||
self.start_time = datetime.now()
|
self.start_time = datetime.now()
|
||||||
|
|
||||||
|
# set request kwargs
|
||||||
|
self.litellm_metadata = litellm_metadata
|
||||||
|
self.custom_llm_provider = custom_llm_provider
|
||||||
|
|
||||||
def _process_chunk(self, chunk):
|
def _process_chunk(self, chunk):
|
||||||
"""Process a single chunk of data from the stream"""
|
"""Process a single chunk of data from the stream"""
|
||||||
if not chunk:
|
if not chunk:
|
||||||
|
@ -70,6 +77,17 @@ class BaseResponsesAPIStreamingIterator:
|
||||||
logging_obj=self.logging_obj,
|
logging_obj=self.logging_obj,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider
|
||||||
|
response_object = getattr(openai_responses_api_chunk, "response", None)
|
||||||
|
if response_object:
|
||||||
|
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||||
|
responses_api_response=response_object,
|
||||||
|
litellm_metadata=self.litellm_metadata,
|
||||||
|
custom_llm_provider=self.custom_llm_provider,
|
||||||
|
)
|
||||||
|
setattr(openai_responses_api_chunk, "response", response)
|
||||||
|
|
||||||
# Store the completed response
|
# Store the completed response
|
||||||
if (
|
if (
|
||||||
openai_responses_api_chunk
|
openai_responses_api_chunk
|
||||||
|
@ -102,8 +120,17 @@ class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||||
model: str,
|
model: str,
|
||||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||||
logging_obj: LiteLLMLoggingObj,
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
):
|
):
|
||||||
super().__init__(response, model, responses_api_provider_config, logging_obj)
|
super().__init__(
|
||||||
|
response,
|
||||||
|
model,
|
||||||
|
responses_api_provider_config,
|
||||||
|
logging_obj,
|
||||||
|
litellm_metadata,
|
||||||
|
custom_llm_provider,
|
||||||
|
)
|
||||||
self.stream_iterator = response.aiter_lines()
|
self.stream_iterator = response.aiter_lines()
|
||||||
|
|
||||||
def __aiter__(self):
|
def __aiter__(self):
|
||||||
|
@ -163,8 +190,17 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||||
model: str,
|
model: str,
|
||||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||||
logging_obj: LiteLLMLoggingObj,
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
):
|
):
|
||||||
super().__init__(response, model, responses_api_provider_config, logging_obj)
|
super().__init__(
|
||||||
|
response,
|
||||||
|
model,
|
||||||
|
responses_api_provider_config,
|
||||||
|
logging_obj,
|
||||||
|
litellm_metadata,
|
||||||
|
custom_llm_provider,
|
||||||
|
)
|
||||||
self.stream_iterator = response.iter_lines()
|
self.stream_iterator = response.iter_lines()
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
@ -228,12 +264,16 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||||
model: str,
|
model: str,
|
||||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||||
logging_obj: LiteLLMLoggingObj,
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
response=response,
|
response=response,
|
||||||
model=model,
|
model=model,
|
||||||
responses_api_provider_config=responses_api_provider_config,
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
|
litellm_metadata=litellm_metadata,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
|
|
||||||
# one-time transform
|
# one-time transform
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import base64
|
import base64
|
||||||
from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints
|
from typing import Any, Dict, Optional, Union, cast, get_type_hints
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
@ -9,6 +9,7 @@ from litellm.types.llms.openai import (
|
||||||
ResponsesAPIOptionalRequestParams,
|
ResponsesAPIOptionalRequestParams,
|
||||||
ResponsesAPIResponse,
|
ResponsesAPIResponse,
|
||||||
)
|
)
|
||||||
|
from litellm.types.responses.main import DecodedResponseId
|
||||||
from litellm.types.utils import SpecialEnums, Usage
|
from litellm.types.utils import SpecialEnums, Usage
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,30 +84,36 @@ class ResponsesAPIRequestUtils:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _update_responses_api_response_id_with_model_id(
|
def _update_responses_api_response_id_with_model_id(
|
||||||
responses_api_response: ResponsesAPIResponse,
|
responses_api_response: ResponsesAPIResponse,
|
||||||
kwargs: Dict[str, Any],
|
custom_llm_provider: Optional[str],
|
||||||
|
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||||
) -> ResponsesAPIResponse:
|
) -> ResponsesAPIResponse:
|
||||||
"""Update the responses_api_response_id with the model_id"""
|
"""
|
||||||
litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
|
Update the responses_api_response_id with model_id and custom_llm_provider
|
||||||
|
|
||||||
|
This builds a composite ID containing the custom LLM provider, model ID, and original response ID
|
||||||
|
"""
|
||||||
|
litellm_metadata = litellm_metadata or {}
|
||||||
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
|
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
|
||||||
model_id = model_info.get("id")
|
model_id = model_info.get("id")
|
||||||
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
|
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
response_id=responses_api_response.id,
|
response_id=responses_api_response.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
responses_api_response.id = updated_id
|
responses_api_response.id = updated_id
|
||||||
return responses_api_response
|
return responses_api_response
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_responses_api_response_id(
|
def _build_responses_api_response_id(
|
||||||
|
custom_llm_provider: Optional[str],
|
||||||
model_id: Optional[str],
|
model_id: Optional[str],
|
||||||
response_id: str,
|
response_id: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Build the responses_api_response_id"""
|
"""Build the responses_api_response_id"""
|
||||||
if model_id is None:
|
|
||||||
return response_id
|
|
||||||
assembled_id: str = str(
|
assembled_id: str = str(
|
||||||
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
|
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
|
||||||
).format(model_id, response_id)
|
).format(custom_llm_provider, model_id, response_id)
|
||||||
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
|
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
|
||||||
"utf-8"
|
"utf-8"
|
||||||
)
|
)
|
||||||
|
@ -115,12 +122,12 @@ class ResponsesAPIRequestUtils:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _decode_responses_api_response_id(
|
def _decode_responses_api_response_id(
|
||||||
response_id: str,
|
response_id: str,
|
||||||
) -> Tuple[Optional[str], str]:
|
) -> DecodedResponseId:
|
||||||
"""
|
"""
|
||||||
Decode the responses_api_response_id
|
Decode the responses_api_response_id
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of model_id, response_id (from upstream provider)
|
DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Remove prefix and decode
|
# Remove prefix and decode
|
||||||
|
@ -129,16 +136,45 @@ class ResponsesAPIRequestUtils:
|
||||||
|
|
||||||
# Parse components using known prefixes
|
# Parse components using known prefixes
|
||||||
if ";" not in decoded_id:
|
if ";" not in decoded_id:
|
||||||
return None, response_id
|
return DecodedResponseId(
|
||||||
|
custom_llm_provider=None,
|
||||||
|
model_id=None,
|
||||||
|
response_id=response_id,
|
||||||
|
)
|
||||||
|
|
||||||
model_part, response_part = decoded_id.split(";", 1)
|
parts = decoded_id.split(";")
|
||||||
model_id = model_part.replace("litellm:model_id:", "")
|
|
||||||
decoded_response_id = response_part.replace("response_id:", "")
|
|
||||||
|
|
||||||
return model_id, decoded_response_id
|
# Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
|
||||||
|
custom_llm_provider = None
|
||||||
|
model_id = None
|
||||||
|
|
||||||
|
if (
|
||||||
|
len(parts) >= 3
|
||||||
|
): # Full format with custom_llm_provider, model_id, and response_id
|
||||||
|
custom_llm_provider_part = parts[0]
|
||||||
|
model_id_part = parts[1]
|
||||||
|
response_part = parts[2]
|
||||||
|
|
||||||
|
custom_llm_provider = custom_llm_provider_part.replace(
|
||||||
|
"litellm:custom_llm_provider:", ""
|
||||||
|
)
|
||||||
|
model_id = model_id_part.replace("model_id:", "")
|
||||||
|
decoded_response_id = response_part.replace("response_id:", "")
|
||||||
|
else:
|
||||||
|
decoded_response_id = response_id
|
||||||
|
|
||||||
|
return DecodedResponseId(
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
model_id=model_id,
|
||||||
|
response_id=decoded_response_id,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
|
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
|
||||||
return None, response_id
|
return DecodedResponseId(
|
||||||
|
custom_llm_provider=None,
|
||||||
|
model_id=None,
|
||||||
|
response_id=response_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ResponseAPILoggingUtils:
|
class ResponseAPILoggingUtils:
|
||||||
|
|
|
@ -31,11 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
|
||||||
if previous_response_id is None:
|
if previous_response_id is None:
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
model_id, response_id = (
|
decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id(
|
||||||
ResponsesAPIRequestUtils._decode_responses_api_response_id(
|
response_id=previous_response_id,
|
||||||
response_id=previous_response_id,
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
model_id = decoded_response.get("model_id")
|
||||||
if model_id is None:
|
if model_id is None:
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
|
from pydantic import PrivateAttr
|
||||||
from typing_extensions import Any, List, Optional, TypedDict
|
from typing_extensions import Any, List, Optional, TypedDict
|
||||||
|
|
||||||
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
|
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
|
||||||
|
@ -46,3 +47,30 @@ class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
|
||||||
status: str # "completed", "in_progress", etc.
|
status: str # "completed", "in_progress", etc.
|
||||||
role: str # "assistant", "user", etc.
|
role: str # "assistant", "user", etc.
|
||||||
content: List[OutputText]
|
content: List[OutputText]
|
||||||
|
|
||||||
|
|
||||||
|
class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject):
|
||||||
|
"""
|
||||||
|
Result of a delete response request
|
||||||
|
|
||||||
|
{
|
||||||
|
"id": "resp_6786a1bec27481909a17d673315b29f6",
|
||||||
|
"object": "response",
|
||||||
|
"deleted": true
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: Optional[str]
|
||||||
|
object: Optional[str]
|
||||||
|
deleted: Optional[bool]
|
||||||
|
|
||||||
|
# Define private attributes using PrivateAttr
|
||||||
|
_hidden_params: dict = PrivateAttr(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class DecodedResponseId(TypedDict, total=False):
|
||||||
|
"""Structure representing a decoded response ID"""
|
||||||
|
|
||||||
|
custom_llm_provider: Optional[str]
|
||||||
|
model_id: Optional[str]
|
||||||
|
response_id: str
|
||||||
|
|
|
@ -2254,7 +2254,9 @@ class SpecialEnums(Enum):
|
||||||
LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
|
LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
|
||||||
LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
|
LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
|
||||||
|
|
||||||
LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}"
|
LITELLM_MANAGED_RESPONSE_COMPLETE_STR = (
|
||||||
|
"litellm:custom_llm_provider:{};model_id:{};response_id:{}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
LLMResponseTypes = Union[
|
LLMResponseTypes = Union[
|
||||||
|
|
|
@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915
|
||||||
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
|
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
|
||||||
|
|
||||||
## DYNAMIC CALLBACKS ##
|
## DYNAMIC CALLBACKS ##
|
||||||
dynamic_callbacks: Optional[
|
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
|
||||||
List[Union[str, Callable, CustomLogger]]
|
kwargs.pop("callbacks", None)
|
||||||
] = kwargs.pop("callbacks", None)
|
)
|
||||||
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
|
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
|
||||||
|
|
||||||
if len(all_callbacks) > 0:
|
if len(all_callbacks) > 0:
|
||||||
|
@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915
|
||||||
exception=e,
|
exception=e,
|
||||||
retry_policy=kwargs.get("retry_policy"),
|
retry_policy=kwargs.get("retry_policy"),
|
||||||
)
|
)
|
||||||
kwargs[
|
kwargs["retry_policy"] = (
|
||||||
"retry_policy"
|
reset_retry_policy()
|
||||||
] = reset_retry_policy() # prevent infinite loops
|
) # prevent infinite loops
|
||||||
litellm.num_retries = (
|
litellm.num_retries = (
|
||||||
None # set retries to None to prevent infinite loops
|
None # set retries to None to prevent infinite loops
|
||||||
)
|
)
|
||||||
|
@ -3028,16 +3028,16 @@ def get_optional_params( # noqa: PLR0915
|
||||||
True # so that main.py adds the function call to the prompt
|
True # so that main.py adds the function call to the prompt
|
||||||
)
|
)
|
||||||
if "tools" in non_default_params:
|
if "tools" in non_default_params:
|
||||||
optional_params[
|
optional_params["functions_unsupported_model"] = (
|
||||||
"functions_unsupported_model"
|
non_default_params.pop("tools")
|
||||||
] = non_default_params.pop("tools")
|
)
|
||||||
non_default_params.pop(
|
non_default_params.pop(
|
||||||
"tool_choice", None
|
"tool_choice", None
|
||||||
) # causes ollama requests to hang
|
) # causes ollama requests to hang
|
||||||
elif "functions" in non_default_params:
|
elif "functions" in non_default_params:
|
||||||
optional_params[
|
optional_params["functions_unsupported_model"] = (
|
||||||
"functions_unsupported_model"
|
non_default_params.pop("functions")
|
||||||
] = non_default_params.pop("functions")
|
)
|
||||||
elif (
|
elif (
|
||||||
litellm.add_function_to_prompt
|
litellm.add_function_to_prompt
|
||||||
): # if user opts to add it to prompt instead
|
): # if user opts to add it to prompt instead
|
||||||
|
@ -3060,10 +3060,10 @@ def get_optional_params( # noqa: PLR0915
|
||||||
|
|
||||||
if "response_format" in non_default_params:
|
if "response_format" in non_default_params:
|
||||||
if provider_config is not None:
|
if provider_config is not None:
|
||||||
non_default_params[
|
non_default_params["response_format"] = (
|
||||||
"response_format"
|
provider_config.get_json_schema_from_pydantic_object(
|
||||||
] = provider_config.get_json_schema_from_pydantic_object(
|
response_format=non_default_params["response_format"]
|
||||||
response_format=non_default_params["response_format"]
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
non_default_params["response_format"] = type_to_response_format_param(
|
non_default_params["response_format"] = type_to_response_format_param(
|
||||||
|
@ -4079,9 +4079,9 @@ def _count_characters(text: str) -> int:
|
||||||
|
|
||||||
|
|
||||||
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
|
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
|
||||||
_choices: Union[
|
_choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
|
||||||
List[Union[Choices, StreamingChoices]], List[StreamingChoices]
|
response_obj.choices
|
||||||
] = response_obj.choices
|
)
|
||||||
|
|
||||||
response_str = ""
|
response_str = ""
|
||||||
for choice in _choices:
|
for choice in _choices:
|
||||||
|
@ -6625,8 +6625,8 @@ class ProviderConfigManager:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_provider_responses_api_config(
|
def get_provider_responses_api_config(
|
||||||
model: str,
|
|
||||||
provider: LlmProviders,
|
provider: LlmProviders,
|
||||||
|
model: Optional[str] = None,
|
||||||
) -> Optional[BaseResponsesAPIConfig]:
|
) -> Optional[BaseResponsesAPIConfig]:
|
||||||
if litellm.LlmProviders.OPENAI == provider:
|
if litellm.LlmProviders.OPENAI == provider:
|
||||||
return litellm.OpenAIResponsesAPIConfig()
|
return litellm.OpenAIResponsesAPIConfig()
|
||||||
|
|
|
@ -203,9 +203,6 @@ class TestOpenAIResponsesAPIConfig:
|
||||||
|
|
||||||
result = self.config.get_complete_url(
|
result = self.config.get_complete_url(
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
model=self.model,
|
|
||||||
api_key="test_api_key",
|
|
||||||
optional_params={},
|
|
||||||
litellm_params={},
|
litellm_params={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -215,9 +212,6 @@ class TestOpenAIResponsesAPIConfig:
|
||||||
with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
|
with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
|
||||||
result = self.config.get_complete_url(
|
result = self.config.get_complete_url(
|
||||||
api_base=None,
|
api_base=None,
|
||||||
model=self.model,
|
|
||||||
api_key="test_api_key",
|
|
||||||
optional_params={},
|
|
||||||
litellm_params={},
|
litellm_params={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -231,9 +225,6 @@ class TestOpenAIResponsesAPIConfig:
|
||||||
):
|
):
|
||||||
result = self.config.get_complete_url(
|
result = self.config.get_complete_url(
|
||||||
api_base=None,
|
api_base=None,
|
||||||
model=self.model,
|
|
||||||
api_key="test_api_key",
|
|
||||||
optional_params={},
|
|
||||||
litellm_params={},
|
litellm_params={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -247,9 +238,6 @@ class TestOpenAIResponsesAPIConfig:
|
||||||
):
|
):
|
||||||
result = self.config.get_complete_url(
|
result = self.config.get_complete_url(
|
||||||
api_base=None,
|
api_base=None,
|
||||||
model=self.model,
|
|
||||||
api_key="test_api_key",
|
|
||||||
optional_params={},
|
|
||||||
litellm_params={},
|
litellm_params={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -260,9 +248,6 @@ class TestOpenAIResponsesAPIConfig:
|
||||||
|
|
||||||
result = self.config.get_complete_url(
|
result = self.config.get_complete_url(
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
model=self.model,
|
|
||||||
api_key="test_api_key",
|
|
||||||
optional_params={},
|
|
||||||
litellm_params={},
|
litellm_params={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -189,6 +189,90 @@ class BaseResponsesAPITest(ABC):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("sync_mode", [False, True])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
|
||||||
|
litellm._turn_on_debug()
|
||||||
|
litellm.set_verbose = True
|
||||||
|
base_completion_call_args = self.get_base_completion_call_args()
|
||||||
|
if sync_mode:
|
||||||
|
response = litellm.responses(
|
||||||
|
input="Basic ping", max_output_tokens=20,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
|
||||||
|
# delete the response
|
||||||
|
if isinstance(response, ResponsesAPIResponse):
|
||||||
|
litellm.delete_responses(
|
||||||
|
response_id=response.id,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError("response is not a ResponsesAPIResponse")
|
||||||
|
else:
|
||||||
|
response = await litellm.aresponses(
|
||||||
|
input="Basic ping", max_output_tokens=20,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
|
||||||
|
# async delete the response
|
||||||
|
if isinstance(response, ResponsesAPIResponse):
|
||||||
|
await litellm.adelete_responses(
|
||||||
|
response_id=response.id,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError("response is not a ResponsesAPIResponse")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
|
||||||
|
#litellm._turn_on_debug()
|
||||||
|
#litellm.set_verbose = True
|
||||||
|
base_completion_call_args = self.get_base_completion_call_args()
|
||||||
|
response_id = None
|
||||||
|
if sync_mode:
|
||||||
|
response_id = None
|
||||||
|
response = litellm.responses(
|
||||||
|
input="Basic ping", max_output_tokens=20,
|
||||||
|
stream=True,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
for event in response:
|
||||||
|
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||||
|
if "response" in event:
|
||||||
|
response_obj = event.get("response")
|
||||||
|
if response_obj is not None:
|
||||||
|
response_id = response_obj.get("id")
|
||||||
|
print("got response_id=", response_id)
|
||||||
|
|
||||||
|
# delete the response
|
||||||
|
assert response_id is not None
|
||||||
|
litellm.delete_responses(
|
||||||
|
response_id=response_id,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await litellm.aresponses(
|
||||||
|
input="Basic ping", max_output_tokens=20,
|
||||||
|
stream=True,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
async for event in response:
|
||||||
|
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||||
|
if "response" in event:
|
||||||
|
response_obj = event.get("response")
|
||||||
|
if response_obj is not None:
|
||||||
|
response_id = response_obj.get("id")
|
||||||
|
print("got response_id=", response_id)
|
||||||
|
|
||||||
|
# delete the response
|
||||||
|
assert response_id is not None
|
||||||
|
await litellm.adelete_responses(
|
||||||
|
response_id=response_id,
|
||||||
|
**base_completion_call_args
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,12 @@ class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
|
||||||
return {
|
return {
|
||||||
"model": "anthropic/claude-3-5-sonnet-latest",
|
"model": "anthropic/claude-3-5-sonnet-latest",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def test_multiturn_tool_calls():
|
def test_multiturn_tool_calls():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue