mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
LiteLLM Minor Fixes & Improvements (04/02/2025) (#9725)
* Add date picker to usage tab + Add reasoning_content token tracking across all providers on streaming (#9722) * feat(new_usage.tsx): add date picker for new usage tab allow user to look back on their usage data * feat(anthropic/chat/transformation.py): report reasoning tokens in completion token details allows usage tracking on how many reasoning tokens are actually being used * feat(streaming_chunk_builder.py): return reasoning_tokens in anthropic/openai streaming response allows tracking reasoning_token usage across providers * Fix update team metadata + fix bulk adding models on Ui (#9721) * fix(handle_add_model_submit.tsx): fix bulk adding models * fix(team_info.tsx): fix team metadata update Fixes https://github.com/BerriAI/litellm/issues/9689 * (v0) Unified file id - allow calling multiple providers with same file id (#9718) * feat(files_endpoints.py): initial commit adding 'target_model_names' support allow developer to specify all the models they want to call with the file * feat(files_endpoints.py): return unified files endpoint * test(test_files_endpoints.py): add validation test - if invalid purpose submitted * feat: more updates * feat: initial working commit of unified file id translation * fix: additional fixes * fix(router.py): remove model replace logic in jsonl on acreate_file enables file upload to work for chat completion requests as well * fix(files_endpoints.py): remove whitespace around model name * fix(azure/handler.py): return acreate_file with correct response type * fix: fix linting errors * test: fix mock test to run on github actions * fix: fix ruff errors * fix: fix file too large error * fix(utils.py): remove redundant var * test: modify test to work on github actions * test: update tests * test: more debug logs to understand ci/cd issue * test: fix test for respx * test: skip mock respx test fails on ci/cd - not clear why * fix: fix ruff check * fix: fix test * fix(model_connection_test.tsx): fix linting error * test: update unit tests
This commit is contained in:
parent
5a18eebdb6
commit
6dda1ba6dd
27 changed files with 889 additions and 96 deletions
|
@ -457,8 +457,12 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_variables: Optional[dict],
|
||||
prompt_management_logger: Optional[CustomLogger] = None,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
custom_logger = self.get_custom_logger_for_prompt_management(model)
|
||||
custom_logger = (
|
||||
prompt_management_logger
|
||||
or self.get_custom_logger_for_prompt_management(model)
|
||||
)
|
||||
if custom_logger:
|
||||
(
|
||||
model,
|
||||
|
|
|
@ -7,6 +7,7 @@ from typing import Dict, List, Literal, Optional, Union, cast
|
|||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionFileObject,
|
||||
ChatCompletionUserMessage,
|
||||
)
|
||||
from litellm.types.utils import Choices, ModelResponse, StreamingChoices
|
||||
|
@ -292,3 +293,58 @@ def get_completion_messages(
|
|||
messages, assistant_continue_message, ensure_alternating_roles
|
||||
)
|
||||
return messages
|
||||
|
||||
|
||||
def get_file_ids_from_messages(messages: List[AllMessageValues]) -> List[str]:
|
||||
"""
|
||||
Gets file ids from messages
|
||||
"""
|
||||
file_ids = []
|
||||
for message in messages:
|
||||
if message.get("role") == "user":
|
||||
content = message.get("content")
|
||||
if content:
|
||||
if isinstance(content, str):
|
||||
continue
|
||||
for c in content:
|
||||
if c["type"] == "file":
|
||||
file_object = cast(ChatCompletionFileObject, c)
|
||||
file_object_file_field = file_object["file"]
|
||||
file_id = file_object_file_field.get("file_id")
|
||||
if file_id:
|
||||
file_ids.append(file_id)
|
||||
return file_ids
|
||||
|
||||
|
||||
def update_messages_with_model_file_ids(
|
||||
messages: List[AllMessageValues],
|
||||
model_id: str,
|
||||
model_file_id_mapping: Dict[str, Dict[str, str]],
|
||||
) -> List[AllMessageValues]:
|
||||
"""
|
||||
Updates messages with model file ids.
|
||||
|
||||
model_file_id_mapping: Dict[str, Dict[str, str]] = {
|
||||
"litellm_proxy/file_id": {
|
||||
"model_id": "provider_file_id"
|
||||
}
|
||||
}
|
||||
"""
|
||||
for message in messages:
|
||||
if message.get("role") == "user":
|
||||
content = message.get("content")
|
||||
if content:
|
||||
if isinstance(content, str):
|
||||
continue
|
||||
for c in content:
|
||||
if c["type"] == "file":
|
||||
file_object = cast(ChatCompletionFileObject, c)
|
||||
file_object_file_field = file_object["file"]
|
||||
file_id = file_object_file_field.get("file_id")
|
||||
if file_id:
|
||||
provider_file_id = (
|
||||
model_file_id_mapping.get(file_id, {}).get(model_id)
|
||||
or file_id
|
||||
)
|
||||
file_object_file_field["file_id"] = provider_file_id
|
||||
return messages
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import base64
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Union, cast
|
||||
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionAssistantContentValue,
|
||||
|
@ -9,7 +9,9 @@ from litellm.types.llms.openai import (
|
|||
from litellm.types.utils import (
|
||||
ChatCompletionAudioResponse,
|
||||
ChatCompletionMessageToolCall,
|
||||
Choices,
|
||||
CompletionTokensDetails,
|
||||
CompletionTokensDetailsWrapper,
|
||||
Function,
|
||||
FunctionCall,
|
||||
ModelResponse,
|
||||
|
@ -203,14 +205,14 @@ class ChunkProcessor:
|
|||
)
|
||||
|
||||
def get_combined_content(
|
||||
self, chunks: List[Dict[str, Any]]
|
||||
self, chunks: List[Dict[str, Any]], delta_key: str = "content"
|
||||
) -> ChatCompletionAssistantContentValue:
|
||||
content_list: List[str] = []
|
||||
for chunk in chunks:
|
||||
choices = chunk["choices"]
|
||||
for choice in choices:
|
||||
delta = choice.get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
content = delta.get(delta_key, "")
|
||||
if content is None:
|
||||
continue # openai v1.0.0 sets content = None for chunks
|
||||
content_list.append(content)
|
||||
|
@ -221,6 +223,11 @@ class ChunkProcessor:
|
|||
# Update the "content" field within the response dictionary
|
||||
return combined_content
|
||||
|
||||
def get_combined_reasoning_content(
|
||||
self, chunks: List[Dict[str, Any]]
|
||||
) -> ChatCompletionAssistantContentValue:
|
||||
return self.get_combined_content(chunks, delta_key="reasoning_content")
|
||||
|
||||
def get_combined_audio_content(
|
||||
self, chunks: List[Dict[str, Any]]
|
||||
) -> ChatCompletionAudioResponse:
|
||||
|
@ -296,12 +303,27 @@ class ChunkProcessor:
|
|||
"prompt_tokens_details": prompt_tokens_details,
|
||||
}
|
||||
|
||||
def count_reasoning_tokens(self, response: ModelResponse) -> int:
|
||||
reasoning_tokens = 0
|
||||
for choice in response.choices:
|
||||
if (
|
||||
hasattr(cast(Choices, choice).message, "reasoning_content")
|
||||
and cast(Choices, choice).message.reasoning_content is not None
|
||||
):
|
||||
reasoning_tokens += token_counter(
|
||||
text=cast(Choices, choice).message.reasoning_content,
|
||||
count_response_tokens=True,
|
||||
)
|
||||
|
||||
return reasoning_tokens
|
||||
|
||||
def calculate_usage(
|
||||
self,
|
||||
chunks: List[Union[Dict[str, Any], ModelResponse]],
|
||||
model: str,
|
||||
completion_output: str,
|
||||
messages: Optional[List] = None,
|
||||
reasoning_tokens: Optional[int] = None,
|
||||
) -> Usage:
|
||||
"""
|
||||
Calculate usage for the given chunks.
|
||||
|
@ -382,6 +404,19 @@ class ChunkProcessor:
|
|||
) # for anthropic
|
||||
if completion_tokens_details is not None:
|
||||
returned_usage.completion_tokens_details = completion_tokens_details
|
||||
|
||||
if reasoning_tokens is not None:
|
||||
if returned_usage.completion_tokens_details is None:
|
||||
returned_usage.completion_tokens_details = (
|
||||
CompletionTokensDetailsWrapper(reasoning_tokens=reasoning_tokens)
|
||||
)
|
||||
elif (
|
||||
returned_usage.completion_tokens_details is not None
|
||||
and returned_usage.completion_tokens_details.reasoning_tokens is None
|
||||
):
|
||||
returned_usage.completion_tokens_details.reasoning_tokens = (
|
||||
reasoning_tokens
|
||||
)
|
||||
if prompt_tokens_details is not None:
|
||||
returned_usage.prompt_tokens_details = prompt_tokens_details
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue