mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Response API cost tracking
This commit is contained in:
parent
8fa313ab07
commit
24cb83b0e4
3 changed files with 65 additions and 5 deletions
|
@ -39,11 +39,13 @@ from litellm.litellm_core_utils.redact_messages import (
|
|||
redact_message_input_output_from_custom_logger,
|
||||
redact_message_input_output_from_logging,
|
||||
)
|
||||
from litellm.responses.utils import ResponseAPILoggingUtils
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
Batch,
|
||||
FineTuningJob,
|
||||
HttpxBinaryResponseContent,
|
||||
ResponsesAPIResponse,
|
||||
)
|
||||
from litellm.types.rerank import RerankResponse
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
|
@ -851,6 +853,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
RerankResponse,
|
||||
Batch,
|
||||
FineTuningJob,
|
||||
ResponsesAPIResponse,
|
||||
],
|
||||
cache_hit: Optional[bool] = None,
|
||||
) -> Optional[float]:
|
||||
|
@ -3111,6 +3114,12 @@ class StandardLoggingPayloadSetup:
|
|||
elif isinstance(usage, Usage):
|
||||
return usage
|
||||
elif isinstance(usage, dict):
|
||||
if ResponseAPILoggingUtils._is_response_api_usage(usage):
|
||||
return (
|
||||
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
usage
|
||||
)
|
||||
)
|
||||
return Usage(**usage)
|
||||
|
||||
raise ValueError(f"usage is required, got={usage} of type {type(usage)}")
|
||||
|
|
|
@ -4,9 +4,11 @@ from typing import Any, Dict
|
|||
import litellm
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseAPIUsage,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIRequestParams,
|
||||
)
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
|
||||
def get_optional_params_responses_api(
|
||||
|
@ -49,3 +51,24 @@ def get_optional_params_responses_api(
|
|||
)
|
||||
|
||||
return mapped_params
|
||||
|
||||
|
||||
class ResponseAPILoggingUtils:
|
||||
@staticmethod
|
||||
def _is_response_api_usage(usage: dict) -> bool:
|
||||
"""returns True if usage is from OpenAI Response API"""
|
||||
if "input_tokens" in usage and "output_tokens" in usage:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage:
|
||||
"""Tranforms the ResponseAPIUsage object to a Usage object"""
|
||||
response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage)
|
||||
prompt_tokens: int = response_api_usage.input_tokens or 0
|
||||
completion_tokens: int = response_api_usage.output_tokens or 0
|
||||
return Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
|
|
|
@ -38,7 +38,6 @@ from openai.types.responses.response import (
|
|||
Response,
|
||||
ResponseOutputItem,
|
||||
ResponseTextConfig,
|
||||
ResponseUsage,
|
||||
Tool,
|
||||
ToolChoice,
|
||||
)
|
||||
|
@ -50,7 +49,7 @@ from openai.types.responses.response_create_params import (
|
|||
ToolChoice,
|
||||
ToolParam,
|
||||
)
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
from typing_extensions import Dict, Required, TypedDict, override
|
||||
|
||||
FileContent = Union[IO[bytes], bytes, PathLike]
|
||||
|
@ -733,7 +732,25 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
|
|||
model: str
|
||||
|
||||
|
||||
class ResponsesAPIResponse(TypedDict, total=False):
|
||||
class OutputTokensDetails(BaseModel):
|
||||
reasoning_tokens: int
|
||||
|
||||
|
||||
class ResponseAPIUsage(BaseModel):
|
||||
input_tokens: int
|
||||
"""The number of input tokens."""
|
||||
|
||||
output_tokens: int
|
||||
"""The number of output tokens."""
|
||||
|
||||
output_tokens_details: OutputTokensDetails
|
||||
"""A detailed breakdown of the output tokens."""
|
||||
|
||||
total_tokens: int
|
||||
"""The total number of tokens used."""
|
||||
|
||||
|
||||
class ResponsesAPIResponse(BaseModel):
|
||||
id: str
|
||||
created_at: float
|
||||
error: Optional[dict]
|
||||
|
@ -754,10 +771,21 @@ class ResponsesAPIResponse(TypedDict, total=False):
|
|||
status: Optional[str]
|
||||
text: Optional[ResponseTextConfig]
|
||||
truncation: Optional[Literal["auto", "disabled"]]
|
||||
usage: Optional[ResponseUsage]
|
||||
usage: Optional[ResponseAPIUsage]
|
||||
user: Optional[str]
|
||||
# Define private attributes using PrivateAttr
|
||||
_hidden_params: dict = PrivateAttr(default_factory=dict)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__dict__[key]
|
||||
|
||||
def get(self, key, default=None):
|
||||
return self.__dict__.get(key, default)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.__dict__
|
||||
|
||||
|
||||
class ResponsesAPIStreamingResponse(TypedDict, total=False):
|
||||
class ResponsesAPIStreamingResponse(BaseModel):
|
||||
type: str
|
||||
response: ResponsesAPIResponse
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue