Realtime API Cost tracking (#9795)

* fix(proxy_server.py): log realtime calls to spendlogs

Fixes https://github.com/BerriAI/litellm/issues/8410

* feat(realtime/): OpenAI Realtime API cost tracking

Closes https://github.com/BerriAI/litellm/issues/8410

* test: add unit testing for coverage

* test: add more unit testing

* fix: handle edge cases
This commit is contained in:
Krish Dholakia 2025-04-07 16:43:12 -07:00 committed by GitHub
parent 9a60cd9deb
commit 4a128cfd64
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 401 additions and 39 deletions

View file

@ -2,7 +2,7 @@ import asyncio
import json
import uuid
from datetime import datetime
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Tuple, Union
import httpx
from fastapi import HTTPException, Request, status
@ -101,33 +101,22 @@ class ProxyBaseLLMRequestProcessing:
verbose_proxy_logger.error(f"Error setting custom headers: {e}")
return {}
async def base_process_llm_request(
async def common_processing_pre_call_logic(
self,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth,
route_type: Literal["acompletion", "aresponses"],
proxy_logging_obj: ProxyLogging,
general_settings: dict,
user_api_key_dict: UserAPIKeyAuth,
proxy_logging_obj: ProxyLogging,
proxy_config: ProxyConfig,
select_data_generator: Callable,
llm_router: Optional[Router] = None,
model: Optional[str] = None,
route_type: Literal["acompletion", "aresponses", "_arealtime"],
version: Optional[str] = None,
user_model: Optional[str] = None,
user_temperature: Optional[float] = None,
user_request_timeout: Optional[float] = None,
user_max_tokens: Optional[int] = None,
user_api_base: Optional[str] = None,
version: Optional[str] = None,
) -> Any:
"""
Common request processing logic for both chat completions and responses API endpoints
"""
verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
)
model: Optional[str] = None,
) -> Tuple[dict, LiteLLMLoggingObj]:
self.data = await add_litellm_data_to_request(
data=self.data,
request=request,
@ -182,13 +171,57 @@ class ProxyBaseLLMRequestProcessing:
self.data["litellm_logging_obj"] = logging_obj
return self.data, logging_obj
async def base_process_llm_request(
self,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth,
route_type: Literal["acompletion", "aresponses", "_arealtime"],
proxy_logging_obj: ProxyLogging,
general_settings: dict,
proxy_config: ProxyConfig,
select_data_generator: Callable,
llm_router: Optional[Router] = None,
model: Optional[str] = None,
user_model: Optional[str] = None,
user_temperature: Optional[float] = None,
user_request_timeout: Optional[float] = None,
user_max_tokens: Optional[int] = None,
user_api_base: Optional[str] = None,
version: Optional[str] = None,
) -> Any:
"""
Common request processing logic for both chat completions and responses API endpoints
"""
verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
)
self.data, logging_obj = await self.common_processing_pre_call_logic(
request=request,
general_settings=general_settings,
proxy_logging_obj=proxy_logging_obj,
user_api_key_dict=user_api_key_dict,
version=version,
proxy_config=proxy_config,
user_model=user_model,
user_temperature=user_temperature,
user_request_timeout=user_request_timeout,
user_max_tokens=user_max_tokens,
user_api_base=user_api_base,
model=model,
route_type=route_type,
)
tasks = []
tasks.append(
proxy_logging_obj.during_call_hook(
data=self.data,
user_api_key_dict=user_api_key_dict,
call_type=ProxyBaseLLMRequestProcessing._get_pre_call_type(
route_type=route_type
route_type=route_type # type: ignore
),
)
)