add responses_api

This commit is contained in:
Ishaan Jaff 2025-03-12 17:08:16 -07:00
parent 055a4fa2d5
commit 584338fb82
3 changed files with 155 additions and 51 deletions

View file

@ -5,18 +5,18 @@ from datetime import datetime
from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, Tuple, Union from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, Tuple, Union
import httpx import httpx
from fastapi import Request from fastapi import HTTPException, Request, status
from fastapi.responses import Response, StreamingResponse from fastapi.responses import Response, StreamingResponse
import litellm import litellm
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import UserAPIKeyAuth from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.proxy._types import ProxyException, UserAPIKeyAuth
from litellm.proxy.auth.auth_utils import check_response_size_is_safe from litellm.proxy.auth.auth_utils import check_response_size_is_safe
from litellm.proxy.common_utils.callback_utils import ( from litellm.proxy.common_utils.callback_utils import (
get_logging_caching_headers, get_logging_caching_headers,
get_remaining_tokens_and_requests_from_request_data, get_remaining_tokens_and_requests_from_request_data,
) )
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.route_llm_request import route_request from litellm.proxy.route_llm_request import route_request
from litellm.proxy.utils import ProxyLogging from litellm.proxy.utils import ProxyLogging
from litellm.router import Router from litellm.router import Router
@ -281,6 +281,66 @@ class ProxyBaseLLMRequestProcessing:
return response return response
@staticmethod
async def _handle_llm_api_exception(
e: Exception,
data: dict,
user_api_key_dict: UserAPIKeyAuth,
proxy_logging_obj: ProxyLogging,
version: Optional[str] = None,
):
"""Raises ProxyException (OpenAI API compatible) if an exception is raised"""
verbose_proxy_logger.exception(
f"litellm.proxy.proxy_server.chat_completion(): Exception occured - {str(e)}"
)
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
litellm_debug_info = getattr(e, "litellm_debug_info", "")
verbose_proxy_logger.debug(
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
e,
litellm_debug_info,
)
timeout = getattr(
e, "timeout", None
) # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly
_litellm_logging_obj: Optional[LiteLLMLoggingObj] = data.get(
"litellm_logging_obj", None
)
custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
user_api_key_dict=user_api_key_dict,
call_id=(
_litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None
),
version=version,
response_cost=0,
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
request_data=data,
timeout=timeout,
)
headers = getattr(e, "headers", {}) or {}
headers.update(custom_headers)
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", str(e)),
type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
headers=headers,
)
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500),
headers=headers,
)
@staticmethod @staticmethod
def _get_pre_call_type( def _get_pre_call_type(
route_type: Literal["acompletion", "aresponses"] route_type: Literal["acompletion", "aresponses"]

View file

@ -3509,55 +3509,11 @@ async def chat_completion( # noqa: PLR0915
_chat_response.usage = _usage # type: ignore _chat_response.usage = _usage # type: ignore
return _chat_response return _chat_response
except Exception as e: except Exception as e:
verbose_proxy_logger.exception( raise await ProxyBaseLLMRequestProcessing._handle_llm_api_exception(
f"litellm.proxy.proxy_server.chat_completion(): Exception occured - {str(e)}" e=e,
) data=data,
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
litellm_debug_info = getattr(e, "litellm_debug_info", "")
verbose_proxy_logger.debug(
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
e,
litellm_debug_info,
)
timeout = getattr(
e, "timeout", None
) # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly
_litellm_logging_obj: Optional[LiteLLMLoggingObj] = data.get(
"litellm_logging_obj", None
)
custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
call_id=( proxy_logging_obj=proxy_logging_obj,
_litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None
),
version=version,
response_cost=0,
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
request_data=data,
timeout=timeout,
)
headers = getattr(e, "headers", {}) or {}
headers.update(custom_headers)
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", str(e)),
type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
headers=headers,
)
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500),
headers=headers,
) )

View file

@ -0,0 +1,88 @@
import backoff
from fastapi import APIRouter, Depends, Request, Response
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import *
from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth
from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
from litellm.proxy.proxy_server import _read_request_body, select_data_generator
router = APIRouter()
@router.post(
"/v1/responses",
dependencies=[Depends(user_api_key_auth)],
tags=["responses"],
)
@router.post(
"/responses",
dependencies=[Depends(user_api_key_auth)],
tags=["responses"],
)
@backoff.on_exception(
backoff.expo,
Exception, # base exception to catch for the backoff
logger=verbose_proxy_logger,
)
async def responses_api(
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses
```bash
curl -X POST http://localhost:4000/v1/responses \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "gpt-4o",
"input": "Tell me about AI"
}'
```
"""
from litellm.proxy.proxy_server import (
general_settings,
llm_router,
proxy_config,
proxy_logging_obj,
user_api_base,
user_max_tokens,
user_model,
user_request_timeout,
user_temperature,
version,
)
data = {}
try:
data = await _read_request_body(request=request)
return await ProxyBaseLLMRequestProcessing.base_process_llm_request(
data=data,
request=request,
fastapi_response=fastapi_response,
user_api_key_dict=user_api_key_dict,
route_type="aresponses",
proxy_logging_obj=proxy_logging_obj,
llm_router=llm_router,
general_settings=general_settings,
proxy_config=proxy_config,
select_data_generator=select_data_generator,
model=None,
user_model=user_model,
user_temperature=user_temperature,
user_request_timeout=user_request_timeout,
user_max_tokens=user_max_tokens,
user_api_base=user_api_base,
version=version,
)
except Exception as e:
raise await ProxyBaseLLMRequestProcessing._handle_llm_api_exception(
e=e,
data=data,
user_api_key_dict=user_api_key_dict,
proxy_logging_obj=proxy_logging_obj,
version=version,
)