Response API cost tracking

This commit is contained in:
Ishaan Jaff 2025-03-11 22:02:14 -07:00
parent 8fa313ab07
commit 24cb83b0e4
3 changed files with 65 additions and 5 deletions

View file

@ -39,11 +39,13 @@ from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_custom_logger,
redact_message_input_output_from_logging,
)
from litellm.responses.utils import ResponseAPILoggingUtils
from litellm.types.llms.openai import (
AllMessageValues,
Batch,
FineTuningJob,
HttpxBinaryResponseContent,
ResponsesAPIResponse,
)
from litellm.types.rerank import RerankResponse
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
@ -851,6 +853,7 @@ class Logging(LiteLLMLoggingBaseClass):
RerankResponse,
Batch,
FineTuningJob,
ResponsesAPIResponse,
],
cache_hit: Optional[bool] = None,
) -> Optional[float]:
@ -3111,6 +3114,12 @@ class StandardLoggingPayloadSetup:
elif isinstance(usage, Usage):
return usage
elif isinstance(usage, dict):
if ResponseAPILoggingUtils._is_response_api_usage(usage):
return (
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage
)
)
return Usage(**usage)
raise ValueError(f"usage is required, got={usage} of type {type(usage)}")

View file

@ -4,9 +4,11 @@ from typing import Any, Dict
import litellm
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import (
ResponseAPIUsage,
ResponsesAPIOptionalRequestParams,
ResponsesAPIRequestParams,
)
from litellm.types.utils import Usage
def get_optional_params_responses_api(
@ -49,3 +51,24 @@ def get_optional_params_responses_api(
)
return mapped_params
class ResponseAPILoggingUtils:
@staticmethod
def _is_response_api_usage(usage: dict) -> bool:
"""returns True if usage is from OpenAI Response API"""
if "input_tokens" in usage and "output_tokens" in usage:
return True
return False
@staticmethod
def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage:
"""Tranforms the ResponseAPIUsage object to a Usage object"""
response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage)
prompt_tokens: int = response_api_usage.input_tokens or 0
completion_tokens: int = response_api_usage.output_tokens or 0
return Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)

View file

@ -38,7 +38,6 @@ from openai.types.responses.response import (
Response,
ResponseOutputItem,
ResponseTextConfig,
ResponseUsage,
Tool,
ToolChoice,
)
@ -50,7 +49,7 @@ from openai.types.responses.response_create_params import (
ToolChoice,
ToolParam,
)
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, PrivateAttr
from typing_extensions import Dict, Required, TypedDict, override
FileContent = Union[IO[bytes], bytes, PathLike]
@ -733,7 +732,25 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
model: str
class ResponsesAPIResponse(TypedDict, total=False):
class OutputTokensDetails(BaseModel):
reasoning_tokens: int
class ResponseAPIUsage(BaseModel):
input_tokens: int
"""The number of input tokens."""
output_tokens: int
"""The number of output tokens."""
output_tokens_details: OutputTokensDetails
"""A detailed breakdown of the output tokens."""
total_tokens: int
"""The total number of tokens used."""
class ResponsesAPIResponse(BaseModel):
id: str
created_at: float
error: Optional[dict]
@ -754,10 +771,21 @@ class ResponsesAPIResponse(TypedDict, total=False):
status: Optional[str]
text: Optional[ResponseTextConfig]
truncation: Optional[Literal["auto", "disabled"]]
usage: Optional[ResponseUsage]
usage: Optional[ResponseAPIUsage]
user: Optional[str]
# Define private attributes using PrivateAttr
_hidden_params: dict = PrivateAttr(default_factory=dict)
def __getitem__(self, key):
return self.__dict__[key]
def get(self, key, default=None):
return self.__dict__.get(key, default)
def __contains__(self, key):
return key in self.__dict__
class ResponsesAPIStreamingResponse(TypedDict, total=False):
class ResponsesAPIStreamingResponse(BaseModel):
type: str
response: ResponsesAPIResponse