mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
add responses_api
This commit is contained in:
parent
055a4fa2d5
commit
584338fb82
3 changed files with 155 additions and 51 deletions
|
@ -5,18 +5,18 @@ from datetime import datetime
|
||||||
from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, Tuple, Union
|
from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from fastapi import Request
|
from fastapi import HTTPException, Request, status
|
||||||
from fastapi.responses import Response, StreamingResponse
|
from fastapi.responses import Response, StreamingResponse
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
from litellm.proxy._types import ProxyException, UserAPIKeyAuth
|
||||||
from litellm.proxy.auth.auth_utils import check_response_size_is_safe
|
from litellm.proxy.auth.auth_utils import check_response_size_is_safe
|
||||||
from litellm.proxy.common_utils.callback_utils import (
|
from litellm.proxy.common_utils.callback_utils import (
|
||||||
get_logging_caching_headers,
|
get_logging_caching_headers,
|
||||||
get_remaining_tokens_and_requests_from_request_data,
|
get_remaining_tokens_and_requests_from_request_data,
|
||||||
)
|
)
|
||||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
|
||||||
from litellm.proxy.route_llm_request import route_request
|
from litellm.proxy.route_llm_request import route_request
|
||||||
from litellm.proxy.utils import ProxyLogging
|
from litellm.proxy.utils import ProxyLogging
|
||||||
from litellm.router import Router
|
from litellm.router import Router
|
||||||
|
@ -281,6 +281,66 @@ class ProxyBaseLLMRequestProcessing:
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _handle_llm_api_exception(
|
||||||
|
e: Exception,
|
||||||
|
data: dict,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
proxy_logging_obj: ProxyLogging,
|
||||||
|
version: Optional[str] = None,
|
||||||
|
):
|
||||||
|
"""Raises ProxyException (OpenAI API compatible) if an exception is raised"""
|
||||||
|
verbose_proxy_logger.exception(
|
||||||
|
f"litellm.proxy.proxy_server.chat_completion(): Exception occured - {str(e)}"
|
||||||
|
)
|
||||||
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
|
)
|
||||||
|
litellm_debug_info = getattr(e, "litellm_debug_info", "")
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
|
||||||
|
e,
|
||||||
|
litellm_debug_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
timeout = getattr(
|
||||||
|
e, "timeout", None
|
||||||
|
) # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly
|
||||||
|
_litellm_logging_obj: Optional[LiteLLMLoggingObj] = data.get(
|
||||||
|
"litellm_logging_obj", None
|
||||||
|
)
|
||||||
|
custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
call_id=(
|
||||||
|
_litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None
|
||||||
|
),
|
||||||
|
version=version,
|
||||||
|
response_cost=0,
|
||||||
|
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||||
|
request_data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
headers = getattr(e, "headers", {}) or {}
|
||||||
|
headers.update(custom_headers)
|
||||||
|
|
||||||
|
if isinstance(e, HTTPException):
|
||||||
|
raise ProxyException(
|
||||||
|
message=getattr(e, "detail", str(e)),
|
||||||
|
type=getattr(e, "type", "None"),
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
error_msg = f"{str(e)}"
|
||||||
|
raise ProxyException(
|
||||||
|
message=getattr(e, "message", error_msg),
|
||||||
|
type=getattr(e, "type", "None"),
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
openai_code=getattr(e, "code", None),
|
||||||
|
code=getattr(e, "status_code", 500),
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_pre_call_type(
|
def _get_pre_call_type(
|
||||||
route_type: Literal["acompletion", "aresponses"]
|
route_type: Literal["acompletion", "aresponses"]
|
||||||
|
|
|
@ -3509,55 +3509,11 @@ async def chat_completion( # noqa: PLR0915
|
||||||
_chat_response.usage = _usage # type: ignore
|
_chat_response.usage = _usage # type: ignore
|
||||||
return _chat_response
|
return _chat_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.exception(
|
raise await ProxyBaseLLMRequestProcessing._handle_llm_api_exception(
|
||||||
f"litellm.proxy.proxy_server.chat_completion(): Exception occured - {str(e)}"
|
e=e,
|
||||||
)
|
data=data,
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
|
||||||
)
|
|
||||||
litellm_debug_info = getattr(e, "litellm_debug_info", "")
|
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
|
|
||||||
e,
|
|
||||||
litellm_debug_info,
|
|
||||||
)
|
|
||||||
|
|
||||||
timeout = getattr(
|
|
||||||
e, "timeout", None
|
|
||||||
) # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly
|
|
||||||
_litellm_logging_obj: Optional[LiteLLMLoggingObj] = data.get(
|
|
||||||
"litellm_logging_obj", None
|
|
||||||
)
|
|
||||||
custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
|
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
call_id=(
|
proxy_logging_obj=proxy_logging_obj,
|
||||||
_litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None
|
|
||||||
),
|
|
||||||
version=version,
|
|
||||||
response_cost=0,
|
|
||||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
|
||||||
request_data=data,
|
|
||||||
timeout=timeout,
|
|
||||||
)
|
|
||||||
headers = getattr(e, "headers", {}) or {}
|
|
||||||
headers.update(custom_headers)
|
|
||||||
|
|
||||||
if isinstance(e, HTTPException):
|
|
||||||
raise ProxyException(
|
|
||||||
message=getattr(e, "detail", str(e)),
|
|
||||||
type=getattr(e, "type", "None"),
|
|
||||||
param=getattr(e, "param", "None"),
|
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
|
||||||
headers=headers,
|
|
||||||
)
|
|
||||||
error_msg = f"{str(e)}"
|
|
||||||
raise ProxyException(
|
|
||||||
message=getattr(e, "message", error_msg),
|
|
||||||
type=getattr(e, "type", "None"),
|
|
||||||
param=getattr(e, "param", "None"),
|
|
||||||
openai_code=getattr(e, "code", None),
|
|
||||||
code=getattr(e, "status_code", 500),
|
|
||||||
headers=headers,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
88
litellm/proxy/response_api_endpoints/endpoints.py
Normal file
88
litellm/proxy/response_api_endpoints/endpoints.py
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
import backoff
|
||||||
|
from fastapi import APIRouter, Depends, Request, Response
|
||||||
|
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.proxy._types import *
|
||||||
|
from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth
|
||||||
|
from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
|
||||||
|
from litellm.proxy.proxy_server import _read_request_body, select_data_generator
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/v1/responses",
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
tags=["responses"],
|
||||||
|
)
|
||||||
|
@router.post(
|
||||||
|
"/responses",
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
tags=["responses"],
|
||||||
|
)
|
||||||
|
@backoff.on_exception(
|
||||||
|
backoff.expo,
|
||||||
|
Exception, # base exception to catch for the backoff
|
||||||
|
logger=verbose_proxy_logger,
|
||||||
|
)
|
||||||
|
async def responses_api(
|
||||||
|
request: Request,
|
||||||
|
fastapi_response: Response,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:4000/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"input": "Tell me about AI"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
from litellm.proxy.proxy_server import (
|
||||||
|
general_settings,
|
||||||
|
llm_router,
|
||||||
|
proxy_config,
|
||||||
|
proxy_logging_obj,
|
||||||
|
user_api_base,
|
||||||
|
user_max_tokens,
|
||||||
|
user_model,
|
||||||
|
user_request_timeout,
|
||||||
|
user_temperature,
|
||||||
|
version,
|
||||||
|
)
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
try:
|
||||||
|
data = await _read_request_body(request=request)
|
||||||
|
return await ProxyBaseLLMRequestProcessing.base_process_llm_request(
|
||||||
|
data=data,
|
||||||
|
request=request,
|
||||||
|
fastapi_response=fastapi_response,
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
route_type="aresponses",
|
||||||
|
proxy_logging_obj=proxy_logging_obj,
|
||||||
|
llm_router=llm_router,
|
||||||
|
general_settings=general_settings,
|
||||||
|
proxy_config=proxy_config,
|
||||||
|
select_data_generator=select_data_generator,
|
||||||
|
model=None,
|
||||||
|
user_model=user_model,
|
||||||
|
user_temperature=user_temperature,
|
||||||
|
user_request_timeout=user_request_timeout,
|
||||||
|
user_max_tokens=user_max_tokens,
|
||||||
|
user_api_base=user_api_base,
|
||||||
|
version=version,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise await ProxyBaseLLMRequestProcessing._handle_llm_api_exception(
|
||||||
|
e=e,
|
||||||
|
data=data,
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
proxy_logging_obj=proxy_logging_obj,
|
||||||
|
version=version,
|
||||||
|
)
|
Loading…
Add table
Add a link
Reference in a new issue