litellm-mirror/litellm/responses/utils.py
Krish Dholakia 5ac61a7572
Add bedrock latency optimized inference support (#9623)
* fix(converse_transformation.py): add performanceConfig param support on bedrock

Closes https://github.com/BerriAI/litellm/issues/7606

* fix(converse_transformation.py): refactor to use more flexible single getter for params which are separate config blocks

* test(test_main.py): add e2e mock test for bedrock performance config

* build(model_prices_and_context_window.json): add versioned multimodal embedding

* refactor(multimodal_embeddings/): migrate to config pattern

* feat(vertex_ai/multimodalembeddings): calculate usage for multimodal embedding calls

Enables cost calculation for multimodal embeddings

* feat(vertex_ai/multimodalembeddings): get usage object for embedding calls

ensures accurate cost tracking for vertexai multimodal embedding calls

* fix(embedding_handler.py): remove unused imports

* fix: fix linting errors

* fix: handle response api usage calculation

* test(test_vertex_ai_multimodal_embedding_transformation.py): update tests

* test: mark flaky test

* feat(vertex_ai/multimodal_embeddings/transformation.py): support text+image+video input

* docs(vertex.md): document sending text + image to vertex multimodal embeddings

* test: remove incorrect file

* fix(multimodal_embeddings/transformation.py): fix linting error

* style: remove unused import
2025-03-29 00:23:09 -07:00

103 lines
3.6 KiB
Python

from typing import Any, Dict, Union, cast, get_type_hints
import litellm
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import (
ResponseAPIUsage,
ResponsesAPIOptionalRequestParams,
)
from litellm.types.utils import Usage
class ResponsesAPIRequestUtils:
"""Helper utils for constructing ResponseAPI requests"""
@staticmethod
def get_optional_params_responses_api(
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
response_api_optional_params: ResponsesAPIOptionalRequestParams,
) -> Dict:
"""
Get optional parameters for the responses API.
Args:
params: Dictionary of all parameters
model: The model name
responses_api_provider_config: The provider configuration for responses API
Returns:
A dictionary of supported parameters for the responses API
"""
# Remove None values and internal parameters
# Get supported parameters for the model
supported_params = responses_api_provider_config.get_supported_openai_params(
model
)
# Check for unsupported parameters
unsupported_params = [
param
for param in response_api_optional_params
if param not in supported_params
]
if unsupported_params:
raise litellm.UnsupportedParamsError(
model=model,
message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}",
)
# Map parameters to provider-specific format
mapped_params = responses_api_provider_config.map_openai_params(
response_api_optional_params=response_api_optional_params,
model=model,
drop_params=litellm.drop_params,
)
return mapped_params
@staticmethod
def get_requested_response_api_optional_param(
params: Dict[str, Any]
) -> ResponsesAPIOptionalRequestParams:
"""
Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
Args:
params: Dictionary of parameters to filter
Returns:
ResponsesAPIOptionalRequestParams instance with only the valid parameters
"""
valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
filtered_params = {k: v for k, v in params.items() if k in valid_keys}
return cast(ResponsesAPIOptionalRequestParams, filtered_params)
class ResponseAPILoggingUtils:
@staticmethod
def _is_response_api_usage(usage: Union[dict, ResponseAPIUsage]) -> bool:
"""returns True if usage is from OpenAI Response API"""
if isinstance(usage, ResponseAPIUsage):
return True
if "input_tokens" in usage and "output_tokens" in usage:
return True
return False
@staticmethod
def _transform_response_api_usage_to_chat_usage(
usage: Union[dict, ResponseAPIUsage]
) -> Usage:
"""Tranforms the ResponseAPIUsage object to a Usage object"""
response_api_usage: ResponseAPIUsage = (
ResponseAPIUsage(**usage) if isinstance(usage, dict) else usage
)
prompt_tokens: int = response_api_usage.input_tokens or 0
completion_tokens: int = response_api_usage.output_tokens or 0
return Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)