LiteLLM Minor Fixes & Improvements (04/02/2025) (#9725)

* Add date picker to usage tab + Add reasoning_content token tracking across all providers on streaming (#9722) * feat(new_usage.tsx): add date picker for new usage tab allow user to look back on their usage data * feat(anthropic/chat/transformation.py): report reasoning tokens in completion token details allows usage tracking on how many reasoning tokens are actually being used * feat(streaming_chunk_builder.py): return reasoning_tokens in anthropic/openai streaming response allows tracking reasoning_token usage across providers * Fix update team metadata + fix bulk adding models on Ui (#9721) * fix(handle_add_model_submit.tsx): fix bulk adding models * fix(team_info.tsx): fix team metadata update Fixes https://github.com/BerriAI/litellm/issues/9689 * (v0) Unified file id - allow calling multiple providers with same file id (#9718) * feat(files_endpoints.py): initial commit adding 'target_model_names' support allow developer to specify all the models they want to call with the file * feat(files_endpoints.py): return unified files endpoint * test(test_files_endpoints.py): add validation test - if invalid purpose submitted * feat: more updates * feat: initial working commit of unified file id translation * fix: additional fixes * fix(router.py): remove model replace logic in jsonl on acreate_file enables file upload to work for chat completion requests as well * fix(files_endpoints.py): remove whitespace around model name * fix(azure/handler.py): return acreate_file with correct response type * fix: fix linting errors * test: fix mock test to run on github actions * fix: fix ruff errors * fix: fix file too large error * fix(utils.py): remove redundant var * test: modify test to work on github actions * test: update tests * test: more debug logs to understand ci/cd issue * test: fix test for respx * test: skip mock respx test fails on ci/cd - not clear why * fix: fix ruff check * fix: fix test * fix(model_connection_test.tsx): fix linting error * test: update unit tests
2025-04-25 10:44:24 +00:00 · 2025-04-03 11:48:52 -07:00 · 2025-04-03 11:48:52 -07:00 · 6dda1ba6dd
commit 6dda1ba6dd
parent 5a18eebdb6
27 changed files with 889 additions and 96 deletions
--- a/litellm/proxy/hooks/managed_files.py
+++ b/litellm/proxy/hooks/managed_files.py
@ -0,0 +1,145 @@
+# What is this?
+## This hook is used to check for LiteLLM managed files in the request body, and replace them with model-specific file id
+
+import uuid
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Union, cast
+
+from litellm import verbose_logger
+from litellm.caching.caching import DualCache
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    get_file_ids_from_messages,
+)
+from litellm.proxy._types import CallTypes, SpecialEnums, UserAPIKeyAuth
+from litellm.types.llms.openai import OpenAIFileObject, OpenAIFilesPurpose
+
+if TYPE_CHECKING:
+    from opentelemetry.trace import Span as _Span
+
+    from litellm.proxy.utils import InternalUsageCache as _InternalUsageCache
+
+    Span = Union[_Span, Any]
+    InternalUsageCache = _InternalUsageCache
+else:
+    Span = Any
+    InternalUsageCache = Any
+
+
+class _PROXY_LiteLLMManagedFiles(CustomLogger):
+    # Class variables or attributes
+    def __init__(self, internal_usage_cache: InternalUsageCache):
+        self.internal_usage_cache = internal_usage_cache
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: Dict,
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+            "pass_through_endpoint",
+            "rerank",
+        ],
+    ) -> Union[Exception, str, Dict, None]:
+        """
+        - Detect litellm_proxy/ file_id
+        - add dictionary of mappings of litellm_proxy/ file_id -> provider_file_id => {litellm_proxy/file_id: {"model_id": id, "file_id": provider_file_id}}
+        """
+        if call_type == CallTypes.completion.value:
+            messages = data.get("messages")
+            if messages:
+                file_ids = get_file_ids_from_messages(messages)
+                if file_ids:
+                    model_file_id_mapping = await self.get_model_file_id_mapping(
+                        file_ids, user_api_key_dict.parent_otel_span
+                    )
+                    data["model_file_id_mapping"] = model_file_id_mapping
+
+        return data
+
+    async def get_model_file_id_mapping(
+        self, file_ids: List[str], litellm_parent_otel_span: Span
+    ) -> dict:
+        """
+        Get model-specific file IDs for a list of proxy file IDs.
+        Returns a dictionary mapping litellm_proxy/ file_id -> model_id -> model_file_id
+
+        1. Get all the litellm_proxy/ file_ids from the messages
+        2. For each file_id, search for cache keys matching the pattern file_id:*
+        3. Return a dictionary of mappings of litellm_proxy/ file_id -> model_id -> model_file_id
+
+        Example:
+        {
+            "litellm_proxy/file_id": {
+                "model_id": "model_file_id"
+            }
+        }
+        """
+        file_id_mapping: Dict[str, Dict[str, str]] = {}
+        litellm_managed_file_ids = []
+
+        for file_id in file_ids:
+            ## CHECK IF FILE ID IS MANAGED BY LITELM
+            if file_id.startswith(SpecialEnums.LITELM_MANAGED_FILE_ID_PREFIX.value):
+                litellm_managed_file_ids.append(file_id)
+
+        if litellm_managed_file_ids:
+            # Get all cache keys matching the pattern file_id:*
+            for file_id in litellm_managed_file_ids:
+                # Search for any cache key starting with this file_id
+                cached_values = cast(
+                    Dict[str, str],
+                    await self.internal_usage_cache.async_get_cache(
+                        key=file_id, litellm_parent_otel_span=litellm_parent_otel_span
+                    ),
+                )
+                if cached_values:
+                    file_id_mapping[file_id] = cached_values
+        return file_id_mapping
+
+    @staticmethod
+    async def return_unified_file_id(
+        file_objects: List[OpenAIFileObject],
+        purpose: OpenAIFilesPurpose,
+        internal_usage_cache: InternalUsageCache,
+        litellm_parent_otel_span: Span,
+    ) -> OpenAIFileObject:
+        unified_file_id = SpecialEnums.LITELM_MANAGED_FILE_ID_PREFIX.value + str(
+            uuid.uuid4()
+        )
+
+        ## CREATE RESPONSE OBJECT
+        response = OpenAIFileObject(
+            id=unified_file_id,
+            object="file",
+            purpose=cast(OpenAIFilesPurpose, purpose),
+            created_at=file_objects[0].created_at,
+            bytes=1234,
+            filename=str(datetime.now().timestamp()),
+            status="uploaded",
+        )
+
+        ## STORE RESPONSE IN DB + CACHE
+        stored_values: Dict[str, str] = {}
+        for file_object in file_objects:
+            model_id = file_object._hidden_params.get("model_id")
+            if model_id is None:
+                verbose_logger.warning(
+                    f"Skipping file_object: {file_object} because model_id in hidden_params={file_object._hidden_params} is None"
+                )
+                continue
+            file_id = file_object.id
+            stored_values[model_id] = file_id
+        await internal_usage_cache.async_set_cache(
+            key=unified_file_id,
+            value=stored_values,
+            litellm_parent_otel_span=litellm_parent_otel_span,
+        )
+
+        return response