mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Response API cost tracking
This commit is contained in:
parent
8fa313ab07
commit
24cb83b0e4
3 changed files with 65 additions and 5 deletions
|
@ -39,11 +39,13 @@ from litellm.litellm_core_utils.redact_messages import (
|
||||||
redact_message_input_output_from_custom_logger,
|
redact_message_input_output_from_custom_logger,
|
||||||
redact_message_input_output_from_logging,
|
redact_message_input_output_from_logging,
|
||||||
)
|
)
|
||||||
|
from litellm.responses.utils import ResponseAPILoggingUtils
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
Batch,
|
Batch,
|
||||||
FineTuningJob,
|
FineTuningJob,
|
||||||
HttpxBinaryResponseContent,
|
HttpxBinaryResponseContent,
|
||||||
|
ResponsesAPIResponse,
|
||||||
)
|
)
|
||||||
from litellm.types.rerank import RerankResponse
|
from litellm.types.rerank import RerankResponse
|
||||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||||
|
@ -851,6 +853,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
RerankResponse,
|
RerankResponse,
|
||||||
Batch,
|
Batch,
|
||||||
FineTuningJob,
|
FineTuningJob,
|
||||||
|
ResponsesAPIResponse,
|
||||||
],
|
],
|
||||||
cache_hit: Optional[bool] = None,
|
cache_hit: Optional[bool] = None,
|
||||||
) -> Optional[float]:
|
) -> Optional[float]:
|
||||||
|
@ -3111,6 +3114,12 @@ class StandardLoggingPayloadSetup:
|
||||||
elif isinstance(usage, Usage):
|
elif isinstance(usage, Usage):
|
||||||
return usage
|
return usage
|
||||||
elif isinstance(usage, dict):
|
elif isinstance(usage, dict):
|
||||||
|
if ResponseAPILoggingUtils._is_response_api_usage(usage):
|
||||||
|
return (
|
||||||
|
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||||
|
usage
|
||||||
|
)
|
||||||
|
)
|
||||||
return Usage(**usage)
|
return Usage(**usage)
|
||||||
|
|
||||||
raise ValueError(f"usage is required, got={usage} of type {type(usage)}")
|
raise ValueError(f"usage is required, got={usage} of type {type(usage)}")
|
||||||
|
|
|
@ -4,9 +4,11 @@ from typing import Any, Dict
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
ResponseAPIUsage,
|
||||||
ResponsesAPIOptionalRequestParams,
|
ResponsesAPIOptionalRequestParams,
|
||||||
ResponsesAPIRequestParams,
|
ResponsesAPIRequestParams,
|
||||||
)
|
)
|
||||||
|
from litellm.types.utils import Usage
|
||||||
|
|
||||||
|
|
||||||
def get_optional_params_responses_api(
|
def get_optional_params_responses_api(
|
||||||
|
@ -49,3 +51,24 @@ def get_optional_params_responses_api(
|
||||||
)
|
)
|
||||||
|
|
||||||
return mapped_params
|
return mapped_params
|
||||||
|
|
||||||
|
|
||||||
|
class ResponseAPILoggingUtils:
|
||||||
|
@staticmethod
|
||||||
|
def _is_response_api_usage(usage: dict) -> bool:
|
||||||
|
"""returns True if usage is from OpenAI Response API"""
|
||||||
|
if "input_tokens" in usage and "output_tokens" in usage:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage:
|
||||||
|
"""Tranforms the ResponseAPIUsage object to a Usage object"""
|
||||||
|
response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage)
|
||||||
|
prompt_tokens: int = response_api_usage.input_tokens or 0
|
||||||
|
completion_tokens: int = response_api_usage.output_tokens or 0
|
||||||
|
return Usage(
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
|
)
|
||||||
|
|
|
@ -38,7 +38,6 @@ from openai.types.responses.response import (
|
||||||
Response,
|
Response,
|
||||||
ResponseOutputItem,
|
ResponseOutputItem,
|
||||||
ResponseTextConfig,
|
ResponseTextConfig,
|
||||||
ResponseUsage,
|
|
||||||
Tool,
|
Tool,
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
)
|
)
|
||||||
|
@ -50,7 +49,7 @@ from openai.types.responses.response_create_params import (
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
ToolParam,
|
ToolParam,
|
||||||
)
|
)
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field, PrivateAttr
|
||||||
from typing_extensions import Dict, Required, TypedDict, override
|
from typing_extensions import Dict, Required, TypedDict, override
|
||||||
|
|
||||||
FileContent = Union[IO[bytes], bytes, PathLike]
|
FileContent = Union[IO[bytes], bytes, PathLike]
|
||||||
|
@ -733,7 +732,25 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
|
||||||
model: str
|
model: str
|
||||||
|
|
||||||
|
|
||||||
class ResponsesAPIResponse(TypedDict, total=False):
|
class OutputTokensDetails(BaseModel):
|
||||||
|
reasoning_tokens: int
|
||||||
|
|
||||||
|
|
||||||
|
class ResponseAPIUsage(BaseModel):
|
||||||
|
input_tokens: int
|
||||||
|
"""The number of input tokens."""
|
||||||
|
|
||||||
|
output_tokens: int
|
||||||
|
"""The number of output tokens."""
|
||||||
|
|
||||||
|
output_tokens_details: OutputTokensDetails
|
||||||
|
"""A detailed breakdown of the output tokens."""
|
||||||
|
|
||||||
|
total_tokens: int
|
||||||
|
"""The total number of tokens used."""
|
||||||
|
|
||||||
|
|
||||||
|
class ResponsesAPIResponse(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
created_at: float
|
created_at: float
|
||||||
error: Optional[dict]
|
error: Optional[dict]
|
||||||
|
@ -754,10 +771,21 @@ class ResponsesAPIResponse(TypedDict, total=False):
|
||||||
status: Optional[str]
|
status: Optional[str]
|
||||||
text: Optional[ResponseTextConfig]
|
text: Optional[ResponseTextConfig]
|
||||||
truncation: Optional[Literal["auto", "disabled"]]
|
truncation: Optional[Literal["auto", "disabled"]]
|
||||||
usage: Optional[ResponseUsage]
|
usage: Optional[ResponseAPIUsage]
|
||||||
user: Optional[str]
|
user: Optional[str]
|
||||||
|
# Define private attributes using PrivateAttr
|
||||||
|
_hidden_params: dict = PrivateAttr(default_factory=dict)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self.__dict__[key]
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
return self.__dict__.get(key, default)
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return key in self.__dict__
|
||||||
|
|
||||||
|
|
||||||
class ResponsesAPIStreamingResponse(TypedDict, total=False):
|
class ResponsesAPIStreamingResponse(BaseModel):
|
||||||
type: str
|
type: str
|
||||||
response: ResponsesAPIResponse
|
response: ResponsesAPIResponse
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue