LiteLLM Minor Fixes & Improvements (04/02/2025) (#9725)

* Add date picker to usage tab + Add reasoning_content token tracking across all providers on streaming (#9722)

* feat(new_usage.tsx): add date picker for new usage tab

allow user to look back on their usage data

* feat(anthropic/chat/transformation.py): report reasoning tokens in completion token details

allows usage tracking on how many reasoning tokens are actually being used

* feat(streaming_chunk_builder.py): return reasoning_tokens in anthropic/openai streaming response

allows tracking reasoning_token usage across providers

* Fix update team metadata + fix bulk adding models on Ui  (#9721)

* fix(handle_add_model_submit.tsx): fix bulk adding models

* fix(team_info.tsx): fix team metadata update

Fixes https://github.com/BerriAI/litellm/issues/9689

* (v0) Unified file id - allow calling multiple providers with same file id (#9718)

* feat(files_endpoints.py): initial commit adding 'target_model_names' support

allow developer to specify all the models they want to call with the file

* feat(files_endpoints.py): return unified files endpoint

* test(test_files_endpoints.py): add validation test - if invalid purpose submitted

* feat: more updates

* feat: initial working commit of unified file id translation

* fix: additional fixes

* fix(router.py): remove model replace logic in jsonl on acreate_file

enables file upload to work for chat completion requests as well

* fix(files_endpoints.py): remove whitespace around model name

* fix(azure/handler.py): return acreate_file with correct response type

* fix: fix linting errors

* test: fix mock test to run on github actions

* fix: fix ruff errors

* fix: fix file too large error

* fix(utils.py): remove redundant var

* test: modify test to work on github actions

* test: update tests

* test: more debug logs to understand ci/cd issue

* test: fix test for respx

* test: skip mock respx test

fails on ci/cd - not clear why

* fix: fix ruff check

* fix: fix test

* fix(model_connection_test.tsx): fix linting error

* test: update unit tests
This commit is contained in:
Krish Dholakia 2025-04-03 11:48:52 -07:00 committed by GitHub
parent 5a18eebdb6
commit 6dda1ba6dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 889 additions and 96 deletions

View file

@ -457,8 +457,12 @@ class Logging(LiteLLMLoggingBaseClass):
non_default_params: dict,
prompt_id: str,
prompt_variables: Optional[dict],
prompt_management_logger: Optional[CustomLogger] = None,
) -> Tuple[str, List[AllMessageValues], dict]:
custom_logger = self.get_custom_logger_for_prompt_management(model)
custom_logger = (
prompt_management_logger
or self.get_custom_logger_for_prompt_management(model)
)
if custom_logger:
(
model,

View file

@ -7,6 +7,7 @@ from typing import Dict, List, Literal, Optional, Union, cast
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAssistantMessage,
ChatCompletionFileObject,
ChatCompletionUserMessage,
)
from litellm.types.utils import Choices, ModelResponse, StreamingChoices
@ -292,3 +293,58 @@ def get_completion_messages(
messages, assistant_continue_message, ensure_alternating_roles
)
return messages
def get_file_ids_from_messages(messages: List[AllMessageValues]) -> List[str]:
"""
Gets file ids from messages
"""
file_ids = []
for message in messages:
if message.get("role") == "user":
content = message.get("content")
if content:
if isinstance(content, str):
continue
for c in content:
if c["type"] == "file":
file_object = cast(ChatCompletionFileObject, c)
file_object_file_field = file_object["file"]
file_id = file_object_file_field.get("file_id")
if file_id:
file_ids.append(file_id)
return file_ids
def update_messages_with_model_file_ids(
messages: List[AllMessageValues],
model_id: str,
model_file_id_mapping: Dict[str, Dict[str, str]],
) -> List[AllMessageValues]:
"""
Updates messages with model file ids.
model_file_id_mapping: Dict[str, Dict[str, str]] = {
"litellm_proxy/file_id": {
"model_id": "provider_file_id"
}
}
"""
for message in messages:
if message.get("role") == "user":
content = message.get("content")
if content:
if isinstance(content, str):
continue
for c in content:
if c["type"] == "file":
file_object = cast(ChatCompletionFileObject, c)
file_object_file_field = file_object["file"]
file_id = file_object_file_field.get("file_id")
if file_id:
provider_file_id = (
model_file_id_mapping.get(file_id, {}).get(model_id)
or file_id
)
file_object_file_field["file_id"] = provider_file_id
return messages

View file

@ -1,6 +1,6 @@
import base64
import time
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union, cast
from litellm.types.llms.openai import (
ChatCompletionAssistantContentValue,
@ -9,7 +9,9 @@ from litellm.types.llms.openai import (
from litellm.types.utils import (
ChatCompletionAudioResponse,
ChatCompletionMessageToolCall,
Choices,
CompletionTokensDetails,
CompletionTokensDetailsWrapper,
Function,
FunctionCall,
ModelResponse,
@ -203,14 +205,14 @@ class ChunkProcessor:
)
def get_combined_content(
self, chunks: List[Dict[str, Any]]
self, chunks: List[Dict[str, Any]], delta_key: str = "content"
) -> ChatCompletionAssistantContentValue:
content_list: List[str] = []
for chunk in chunks:
choices = chunk["choices"]
for choice in choices:
delta = choice.get("delta", {})
content = delta.get("content", "")
content = delta.get(delta_key, "")
if content is None:
continue # openai v1.0.0 sets content = None for chunks
content_list.append(content)
@ -221,6 +223,11 @@ class ChunkProcessor:
# Update the "content" field within the response dictionary
return combined_content
def get_combined_reasoning_content(
self, chunks: List[Dict[str, Any]]
) -> ChatCompletionAssistantContentValue:
return self.get_combined_content(chunks, delta_key="reasoning_content")
def get_combined_audio_content(
self, chunks: List[Dict[str, Any]]
) -> ChatCompletionAudioResponse:
@ -296,12 +303,27 @@ class ChunkProcessor:
"prompt_tokens_details": prompt_tokens_details,
}
def count_reasoning_tokens(self, response: ModelResponse) -> int:
reasoning_tokens = 0
for choice in response.choices:
if (
hasattr(cast(Choices, choice).message, "reasoning_content")
and cast(Choices, choice).message.reasoning_content is not None
):
reasoning_tokens += token_counter(
text=cast(Choices, choice).message.reasoning_content,
count_response_tokens=True,
)
return reasoning_tokens
def calculate_usage(
self,
chunks: List[Union[Dict[str, Any], ModelResponse]],
model: str,
completion_output: str,
messages: Optional[List] = None,
reasoning_tokens: Optional[int] = None,
) -> Usage:
"""
Calculate usage for the given chunks.
@ -382,6 +404,19 @@ class ChunkProcessor:
) # for anthropic
if completion_tokens_details is not None:
returned_usage.completion_tokens_details = completion_tokens_details
if reasoning_tokens is not None:
if returned_usage.completion_tokens_details is None:
returned_usage.completion_tokens_details = (
CompletionTokensDetailsWrapper(reasoning_tokens=reasoning_tokens)
)
elif (
returned_usage.completion_tokens_details is not None
and returned_usage.completion_tokens_details.reasoning_tokens is None
):
returned_usage.completion_tokens_details.reasoning_tokens = (
reasoning_tokens
)
if prompt_tokens_details is not None:
returned_usage.prompt_tokens_details = prompt_tokens_details