mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Add bedrock latency optimized inference support (#9623)
* fix(converse_transformation.py): add performanceConfig param support on bedrock Closes https://github.com/BerriAI/litellm/issues/7606 * fix(converse_transformation.py): refactor to use more flexible single getter for params which are separate config blocks * test(test_main.py): add e2e mock test for bedrock performance config * build(model_prices_and_context_window.json): add versioned multimodal embedding * refactor(multimodal_embeddings/): migrate to config pattern * feat(vertex_ai/multimodalembeddings): calculate usage for multimodal embedding calls Enables cost calculation for multimodal embeddings * feat(vertex_ai/multimodalembeddings): get usage object for embedding calls ensures accurate cost tracking for vertexai multimodal embedding calls * fix(embedding_handler.py): remove unused imports * fix: fix linting errors * fix: handle response api usage calculation * test(test_vertex_ai_multimodal_embedding_transformation.py): update tests * test: mark flaky test * feat(vertex_ai/multimodal_embeddings/transformation.py): support text+image+video input * docs(vertex.md): document sending text + image to vertex multimodal embeddings * test: remove incorrect file * fix(multimodal_embeddings/transformation.py): fix linting error * style: remove unused import
This commit is contained in:
parent
0742e6afd6
commit
5ac61a7572
19 changed files with 806 additions and 245 deletions
|
@ -1,4 +1,4 @@
|
|||
from typing import Any, Dict, cast, get_type_hints
|
||||
from typing import Any, Dict, Union, cast, get_type_hints
|
||||
|
||||
import litellm
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
|
@ -78,16 +78,22 @@ class ResponsesAPIRequestUtils:
|
|||
|
||||
class ResponseAPILoggingUtils:
|
||||
@staticmethod
|
||||
def _is_response_api_usage(usage: dict) -> bool:
|
||||
def _is_response_api_usage(usage: Union[dict, ResponseAPIUsage]) -> bool:
|
||||
"""returns True if usage is from OpenAI Response API"""
|
||||
if isinstance(usage, ResponseAPIUsage):
|
||||
return True
|
||||
if "input_tokens" in usage and "output_tokens" in usage:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage:
|
||||
def _transform_response_api_usage_to_chat_usage(
|
||||
usage: Union[dict, ResponseAPIUsage]
|
||||
) -> Usage:
|
||||
"""Tranforms the ResponseAPIUsage object to a Usage object"""
|
||||
response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage)
|
||||
response_api_usage: ResponseAPIUsage = (
|
||||
ResponseAPIUsage(**usage) if isinstance(usage, dict) else usage
|
||||
)
|
||||
prompt_tokens: int = response_api_usage.input_tokens or 0
|
||||
completion_tokens: int = response_api_usage.output_tokens or 0
|
||||
return Usage(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue