diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 36e6e8b3dc..00613e5d76 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -5,18 +5,18 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, Tuple, Union import httpx -from fastapi import Request +from fastapi import HTTPException, Request, status from fastapi.responses import Response, StreamingResponse import litellm from litellm._logging import verbose_proxy_logger -from litellm.proxy._types import UserAPIKeyAuth +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.proxy._types import ProxyException, UserAPIKeyAuth from litellm.proxy.auth.auth_utils import check_response_size_is_safe from litellm.proxy.common_utils.callback_utils import ( get_logging_caching_headers, get_remaining_tokens_and_requests_from_request_data, ) -from litellm.proxy.common_utils.http_parsing_utils import _read_request_body from litellm.proxy.route_llm_request import route_request from litellm.proxy.utils import ProxyLogging from litellm.router import Router @@ -281,6 +281,66 @@ class ProxyBaseLLMRequestProcessing: return response + @staticmethod + async def _handle_llm_api_exception( + e: Exception, + data: dict, + user_api_key_dict: UserAPIKeyAuth, + proxy_logging_obj: ProxyLogging, + version: Optional[str] = None, + ): + """Raises ProxyException (OpenAI API compatible) if an exception is raised""" + verbose_proxy_logger.exception( + f"litellm.proxy.proxy_server.chat_completion(): Exception occured - {str(e)}" + ) + await proxy_logging_obj.post_call_failure_hook( + user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data + ) + litellm_debug_info = getattr(e, "litellm_debug_info", "") + verbose_proxy_logger.debug( + "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`", + e, + litellm_debug_info, + ) + + timeout = getattr( + e, "timeout", None + ) # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly + _litellm_logging_obj: Optional[LiteLLMLoggingObj] = data.get( + "litellm_logging_obj", None + ) + custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + user_api_key_dict=user_api_key_dict, + call_id=( + _litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None + ), + version=version, + response_cost=0, + model_region=getattr(user_api_key_dict, "allowed_model_region", ""), + request_data=data, + timeout=timeout, + ) + headers = getattr(e, "headers", {}) or {} + headers.update(custom_headers) + + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", str(e)), + type=getattr(e, "type", "None"), + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), + headers=headers, + ) + error_msg = f"{str(e)}" + raise ProxyException( + message=getattr(e, "message", error_msg), + type=getattr(e, "type", "None"), + param=getattr(e, "param", "None"), + openai_code=getattr(e, "code", None), + code=getattr(e, "status_code", 500), + headers=headers, + ) + @staticmethod def _get_pre_call_type( route_type: Literal["acompletion", "aresponses"] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index eda43de3cf..d39c5e8182 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3509,55 +3509,11 @@ async def chat_completion( # noqa: PLR0915 _chat_response.usage = _usage # type: ignore return _chat_response except Exception as e: - verbose_proxy_logger.exception( - f"litellm.proxy.proxy_server.chat_completion(): Exception occured - {str(e)}" - ) - await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data - ) - litellm_debug_info = getattr(e, "litellm_debug_info", "") - verbose_proxy_logger.debug( - "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`", - e, - litellm_debug_info, - ) - - timeout = getattr( - e, "timeout", None - ) # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly - _litellm_logging_obj: Optional[LiteLLMLoggingObj] = data.get( - "litellm_logging_obj", None - ) - custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + raise await ProxyBaseLLMRequestProcessing._handle_llm_api_exception( + e=e, + data=data, user_api_key_dict=user_api_key_dict, - call_id=( - _litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None - ), - version=version, - response_cost=0, - model_region=getattr(user_api_key_dict, "allowed_model_region", ""), - request_data=data, - timeout=timeout, - ) - headers = getattr(e, "headers", {}) or {} - headers.update(custom_headers) - - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "detail", str(e)), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), - headers=headers, - ) - error_msg = f"{str(e)}" - raise ProxyException( - message=getattr(e, "message", error_msg), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - openai_code=getattr(e, "code", None), - code=getattr(e, "status_code", 500), - headers=headers, + proxy_logging_obj=proxy_logging_obj, ) diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py new file mode 100644 index 0000000000..fb84793992 --- /dev/null +++ b/litellm/proxy/response_api_endpoints/endpoints.py @@ -0,0 +1,88 @@ +import backoff +from fastapi import APIRouter, Depends, Request, Response + +from litellm._logging import verbose_proxy_logger +from litellm.proxy._types import * +from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth +from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing +from litellm.proxy.proxy_server import _read_request_body, select_data_generator + +router = APIRouter() + + +@router.post( + "/v1/responses", + dependencies=[Depends(user_api_key_auth)], + tags=["responses"], +) +@router.post( + "/responses", + dependencies=[Depends(user_api_key_auth)], + tags=["responses"], +) +@backoff.on_exception( + backoff.expo, + Exception, # base exception to catch for the backoff + logger=verbose_proxy_logger, +) +async def responses_api( + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses + + ```bash + curl -X POST http://localhost:4000/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4o", + "input": "Tell me about AI" + }' + ``` + """ + from litellm.proxy.proxy_server import ( + general_settings, + llm_router, + proxy_config, + proxy_logging_obj, + user_api_base, + user_max_tokens, + user_model, + user_request_timeout, + user_temperature, + version, + ) + + data = {} + try: + data = await _read_request_body(request=request) + return await ProxyBaseLLMRequestProcessing.base_process_llm_request( + data=data, + request=request, + fastapi_response=fastapi_response, + user_api_key_dict=user_api_key_dict, + route_type="aresponses", + proxy_logging_obj=proxy_logging_obj, + llm_router=llm_router, + general_settings=general_settings, + proxy_config=proxy_config, + select_data_generator=select_data_generator, + model=None, + user_model=user_model, + user_temperature=user_temperature, + user_request_timeout=user_request_timeout, + user_max_tokens=user_max_tokens, + user_api_base=user_api_base, + version=version, + ) + except Exception as e: + raise await ProxyBaseLLMRequestProcessing._handle_llm_api_exception( + e=e, + data=data, + user_api_key_dict=user_api_key_dict, + proxy_logging_obj=proxy_logging_obj, + version=version, + )