mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
* Add date picker to usage tab + Add reasoning_content token tracking across all providers on streaming (#9722) * feat(new_usage.tsx): add date picker for new usage tab allow user to look back on their usage data * feat(anthropic/chat/transformation.py): report reasoning tokens in completion token details allows usage tracking on how many reasoning tokens are actually being used * feat(streaming_chunk_builder.py): return reasoning_tokens in anthropic/openai streaming response allows tracking reasoning_token usage across providers * Fix update team metadata + fix bulk adding models on Ui (#9721) * fix(handle_add_model_submit.tsx): fix bulk adding models * fix(team_info.tsx): fix team metadata update Fixes https://github.com/BerriAI/litellm/issues/9689 * (v0) Unified file id - allow calling multiple providers with same file id (#9718) * feat(files_endpoints.py): initial commit adding 'target_model_names' support allow developer to specify all the models they want to call with the file * feat(files_endpoints.py): return unified files endpoint * test(test_files_endpoints.py): add validation test - if invalid purpose submitted * feat: more updates * feat: initial working commit of unified file id translation * fix: additional fixes * fix(router.py): remove model replace logic in jsonl on acreate_file enables file upload to work for chat completion requests as well * fix(files_endpoints.py): remove whitespace around model name * fix(azure/handler.py): return acreate_file with correct response type * fix: fix linting errors * test: fix mock test to run on github actions * fix: fix ruff errors * fix: fix file too large error * fix(utils.py): remove redundant var * test: modify test to work on github actions * test: update tests * test: more debug logs to understand ci/cd issue * test: fix test for respx * test: skip mock respx test fails on ci/cd - not clear why * fix: fix ruff check * fix: fix test * fix(model_connection_test.tsx): fix linting error * test: update unit tests
145 lines
5.3 KiB
Python
145 lines
5.3 KiB
Python
# What is this?
|
|
## This hook is used to check for LiteLLM managed files in the request body, and replace them with model-specific file id
|
|
|
|
import uuid
|
|
from datetime import datetime
|
|
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Union, cast
|
|
|
|
from litellm import verbose_logger
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.integrations.custom_logger import CustomLogger
|
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
|
get_file_ids_from_messages,
|
|
)
|
|
from litellm.proxy._types import CallTypes, SpecialEnums, UserAPIKeyAuth
|
|
from litellm.types.llms.openai import OpenAIFileObject, OpenAIFilesPurpose
|
|
|
|
if TYPE_CHECKING:
|
|
from opentelemetry.trace import Span as _Span
|
|
|
|
from litellm.proxy.utils import InternalUsageCache as _InternalUsageCache
|
|
|
|
Span = Union[_Span, Any]
|
|
InternalUsageCache = _InternalUsageCache
|
|
else:
|
|
Span = Any
|
|
InternalUsageCache = Any
|
|
|
|
|
|
class _PROXY_LiteLLMManagedFiles(CustomLogger):
|
|
# Class variables or attributes
|
|
def __init__(self, internal_usage_cache: InternalUsageCache):
|
|
self.internal_usage_cache = internal_usage_cache
|
|
|
|
async def async_pre_call_hook(
|
|
self,
|
|
user_api_key_dict: UserAPIKeyAuth,
|
|
cache: DualCache,
|
|
data: Dict,
|
|
call_type: Literal[
|
|
"completion",
|
|
"text_completion",
|
|
"embeddings",
|
|
"image_generation",
|
|
"moderation",
|
|
"audio_transcription",
|
|
"pass_through_endpoint",
|
|
"rerank",
|
|
],
|
|
) -> Union[Exception, str, Dict, None]:
|
|
"""
|
|
- Detect litellm_proxy/ file_id
|
|
- add dictionary of mappings of litellm_proxy/ file_id -> provider_file_id => {litellm_proxy/file_id: {"model_id": id, "file_id": provider_file_id}}
|
|
"""
|
|
if call_type == CallTypes.completion.value:
|
|
messages = data.get("messages")
|
|
if messages:
|
|
file_ids = get_file_ids_from_messages(messages)
|
|
if file_ids:
|
|
model_file_id_mapping = await self.get_model_file_id_mapping(
|
|
file_ids, user_api_key_dict.parent_otel_span
|
|
)
|
|
data["model_file_id_mapping"] = model_file_id_mapping
|
|
|
|
return data
|
|
|
|
async def get_model_file_id_mapping(
|
|
self, file_ids: List[str], litellm_parent_otel_span: Span
|
|
) -> dict:
|
|
"""
|
|
Get model-specific file IDs for a list of proxy file IDs.
|
|
Returns a dictionary mapping litellm_proxy/ file_id -> model_id -> model_file_id
|
|
|
|
1. Get all the litellm_proxy/ file_ids from the messages
|
|
2. For each file_id, search for cache keys matching the pattern file_id:*
|
|
3. Return a dictionary of mappings of litellm_proxy/ file_id -> model_id -> model_file_id
|
|
|
|
Example:
|
|
{
|
|
"litellm_proxy/file_id": {
|
|
"model_id": "model_file_id"
|
|
}
|
|
}
|
|
"""
|
|
file_id_mapping: Dict[str, Dict[str, str]] = {}
|
|
litellm_managed_file_ids = []
|
|
|
|
for file_id in file_ids:
|
|
## CHECK IF FILE ID IS MANAGED BY LITELM
|
|
if file_id.startswith(SpecialEnums.LITELM_MANAGED_FILE_ID_PREFIX.value):
|
|
litellm_managed_file_ids.append(file_id)
|
|
|
|
if litellm_managed_file_ids:
|
|
# Get all cache keys matching the pattern file_id:*
|
|
for file_id in litellm_managed_file_ids:
|
|
# Search for any cache key starting with this file_id
|
|
cached_values = cast(
|
|
Dict[str, str],
|
|
await self.internal_usage_cache.async_get_cache(
|
|
key=file_id, litellm_parent_otel_span=litellm_parent_otel_span
|
|
),
|
|
)
|
|
if cached_values:
|
|
file_id_mapping[file_id] = cached_values
|
|
return file_id_mapping
|
|
|
|
@staticmethod
|
|
async def return_unified_file_id(
|
|
file_objects: List[OpenAIFileObject],
|
|
purpose: OpenAIFilesPurpose,
|
|
internal_usage_cache: InternalUsageCache,
|
|
litellm_parent_otel_span: Span,
|
|
) -> OpenAIFileObject:
|
|
unified_file_id = SpecialEnums.LITELM_MANAGED_FILE_ID_PREFIX.value + str(
|
|
uuid.uuid4()
|
|
)
|
|
|
|
## CREATE RESPONSE OBJECT
|
|
response = OpenAIFileObject(
|
|
id=unified_file_id,
|
|
object="file",
|
|
purpose=cast(OpenAIFilesPurpose, purpose),
|
|
created_at=file_objects[0].created_at,
|
|
bytes=1234,
|
|
filename=str(datetime.now().timestamp()),
|
|
status="uploaded",
|
|
)
|
|
|
|
## STORE RESPONSE IN DB + CACHE
|
|
stored_values: Dict[str, str] = {}
|
|
for file_object in file_objects:
|
|
model_id = file_object._hidden_params.get("model_id")
|
|
if model_id is None:
|
|
verbose_logger.warning(
|
|
f"Skipping file_object: {file_object} because model_id in hidden_params={file_object._hidden_params} is None"
|
|
)
|
|
continue
|
|
file_id = file_object.id
|
|
stored_values[model_id] = file_id
|
|
await internal_usage_cache.async_set_cache(
|
|
key=unified_file_id,
|
|
value=stored_values,
|
|
litellm_parent_otel_span=litellm_parent_otel_span,
|
|
)
|
|
|
|
return response
|