LiteLLM Minor Fixes & Improvements (04/02/2025) (#9725)

* Add date picker to usage tab + Add reasoning_content token tracking across all providers on streaming (#9722) * feat(new_usage.tsx): add date picker for new usage tab allow user to look back on their usage data * feat(anthropic/chat/transformation.py): report reasoning tokens in completion token details allows usage tracking on how many reasoning tokens are actually being used * feat(streaming_chunk_builder.py): return reasoning_tokens in anthropic/openai streaming response allows tracking reasoning_token usage across providers * Fix update team metadata + fix bulk adding models on Ui (#9721) * fix(handle_add_model_submit.tsx): fix bulk adding models * fix(team_info.tsx): fix team metadata update Fixes https://github.com/BerriAI/litellm/issues/9689 * (v0) Unified file id - allow calling multiple providers with same file id (#9718) * feat(files_endpoints.py): initial commit adding 'target_model_names' support allow developer to specify all the models they want to call with the file * feat(files_endpoints.py): return unified files endpoint * test(test_files_endpoints.py): add validation test - if invalid purpose submitted * feat: more updates * feat: initial working commit of unified file id translation * fix: additional fixes * fix(router.py): remove model replace logic in jsonl on acreate_file enables file upload to work for chat completion requests as well * fix(files_endpoints.py): remove whitespace around model name * fix(azure/handler.py): return acreate_file with correct response type * fix: fix linting errors * test: fix mock test to run on github actions * fix: fix ruff errors * fix: fix file too large error * fix(utils.py): remove redundant var * test: modify test to work on github actions * test: update tests * test: more debug logs to understand ci/cd issue * test: fix test for respx * test: skip mock respx test fails on ci/cd - not clear why * fix: fix ruff check * fix: fix test * fix(model_connection_test.tsx): fix linting error * test: update unit tests
2025-04-27 03:34:10 +00:00 · 2025-04-03 11:48:52 -07:00 · 2025-04-03 11:48:52 -07:00 · 6dda1ba6dd
commit 6dda1ba6dd
parent 5a18eebdb6
27 changed files with 889 additions and 96 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -457,8 +457,12 @@ class Logging(LiteLLMLoggingBaseClass):
        non_default_params: dict,
        prompt_id: str,
        prompt_variables: Optional[dict],
+        prompt_management_logger: Optional[CustomLogger] = None,
    ) -> Tuple[str, List[AllMessageValues], dict]:
-        custom_logger = self.get_custom_logger_for_prompt_management(model)
+        custom_logger = (
+            prompt_management_logger
+            or self.get_custom_logger_for_prompt_management(model)
+        )
        if custom_logger:
            (
                model,
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@ -7,6 +7,7 @@ from typing import Dict, List, Literal, Optional, Union, cast
 from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionAssistantMessage,
+    ChatCompletionFileObject,
    ChatCompletionUserMessage,
 )
 from litellm.types.utils import Choices, ModelResponse, StreamingChoices
@ -292,3 +293,58 @@ def get_completion_messages(
        messages, assistant_continue_message, ensure_alternating_roles
    )
    return messages
+
+
+def get_file_ids_from_messages(messages: List[AllMessageValues]) -> List[str]:
+    """
+    Gets file ids from messages
+    """
+    file_ids = []
+    for message in messages:
+        if message.get("role") == "user":
+            content = message.get("content")
+            if content:
+                if isinstance(content, str):
+                    continue
+                for c in content:
+                    if c["type"] == "file":
+                        file_object = cast(ChatCompletionFileObject, c)
+                        file_object_file_field = file_object["file"]
+                        file_id = file_object_file_field.get("file_id")
+                        if file_id:
+                            file_ids.append(file_id)
+    return file_ids
+
+
+def update_messages_with_model_file_ids(
+    messages: List[AllMessageValues],
+    model_id: str,
+    model_file_id_mapping: Dict[str, Dict[str, str]],
+) -> List[AllMessageValues]:
+    """
+    Updates messages with model file ids.
+
+    model_file_id_mapping: Dict[str, Dict[str, str]] = {
+        "litellm_proxy/file_id": {
+            "model_id": "provider_file_id"
+        }
+    }
+    """
+    for message in messages:
+        if message.get("role") == "user":
+            content = message.get("content")
+            if content:
+                if isinstance(content, str):
+                    continue
+                for c in content:
+                    if c["type"] == "file":
+                        file_object = cast(ChatCompletionFileObject, c)
+                        file_object_file_field = file_object["file"]
+                        file_id = file_object_file_field.get("file_id")
+                        if file_id:
+                            provider_file_id = (
+                                model_file_id_mapping.get(file_id, {}).get(model_id)
+                                or file_id
+                            )
+                            file_object_file_field["file_id"] = provider_file_id
+    return messages
--- a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py
+++ b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py
@ -1,6 +1,6 @@
 import base64
 import time
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union, cast

 from litellm.types.llms.openai import (
    ChatCompletionAssistantContentValue,
@ -9,7 +9,9 @@ from litellm.types.llms.openai import (
 from litellm.types.utils import (
    ChatCompletionAudioResponse,
    ChatCompletionMessageToolCall,
+    Choices,
    CompletionTokensDetails,
+    CompletionTokensDetailsWrapper,
    Function,
    FunctionCall,
    ModelResponse,
@ -203,14 +205,14 @@ class ChunkProcessor:
        )

    def get_combined_content(
-        self, chunks: List[Dict[str, Any]]
+        self, chunks: List[Dict[str, Any]], delta_key: str = "content"
    ) -> ChatCompletionAssistantContentValue:
        content_list: List[str] = []
        for chunk in chunks:
            choices = chunk["choices"]
            for choice in choices:
                delta = choice.get("delta", {})
-                content = delta.get("content", "")
+                content = delta.get(delta_key, "")
                if content is None:
                    continue  # openai v1.0.0 sets content = None for chunks
                content_list.append(content)
@ -221,6 +223,11 @@ class ChunkProcessor:
        # Update the "content" field within the response dictionary
        return combined_content

+    def get_combined_reasoning_content(
+        self, chunks: List[Dict[str, Any]]
+    ) -> ChatCompletionAssistantContentValue:
+        return self.get_combined_content(chunks, delta_key="reasoning_content")
+
    def get_combined_audio_content(
        self, chunks: List[Dict[str, Any]]
    ) -> ChatCompletionAudioResponse:
@ -296,12 +303,27 @@ class ChunkProcessor:
            "prompt_tokens_details": prompt_tokens_details,
        }

+    def count_reasoning_tokens(self, response: ModelResponse) -> int:
+        reasoning_tokens = 0
+        for choice in response.choices:
+            if (
+                hasattr(cast(Choices, choice).message, "reasoning_content")
+                and cast(Choices, choice).message.reasoning_content is not None
+            ):
+                reasoning_tokens += token_counter(
+                    text=cast(Choices, choice).message.reasoning_content,
+                    count_response_tokens=True,
+                )
+
+        return reasoning_tokens
+
    def calculate_usage(
        self,
        chunks: List[Union[Dict[str, Any], ModelResponse]],
        model: str,
        completion_output: str,
        messages: Optional[List] = None,
+        reasoning_tokens: Optional[int] = None,
    ) -> Usage:
        """
        Calculate usage for the given chunks.
@ -382,6 +404,19 @@ class ChunkProcessor:
            )  # for anthropic
        if completion_tokens_details is not None:
            returned_usage.completion_tokens_details = completion_tokens_details
+
+        if reasoning_tokens is not None:
+            if returned_usage.completion_tokens_details is None:
+                returned_usage.completion_tokens_details = (
+                    CompletionTokensDetailsWrapper(reasoning_tokens=reasoning_tokens)
+                )
+            elif (
+                returned_usage.completion_tokens_details is not None
+                and returned_usage.completion_tokens_details.reasoning_tokens is None
+            ):
+                returned_usage.completion_tokens_details.reasoning_tokens = (
+                    reasoning_tokens
+                )
        if prompt_tokens_details is not None:
            returned_usage.prompt_tokens_details = prompt_tokens_details