LiteLLM Minor Fixes & Improvements (04/02/2025) (#9725)

* Add date picker to usage tab + Add reasoning_content token tracking across all providers on streaming (#9722)

* feat(new_usage.tsx): add date picker for new usage tab

allow user to look back on their usage data

* feat(anthropic/chat/transformation.py): report reasoning tokens in completion token details

allows usage tracking on how many reasoning tokens are actually being used

* feat(streaming_chunk_builder.py): return reasoning_tokens in anthropic/openai streaming response

allows tracking reasoning_token usage across providers

* Fix update team metadata + fix bulk adding models on Ui  (#9721)

* fix(handle_add_model_submit.tsx): fix bulk adding models

* fix(team_info.tsx): fix team metadata update

Fixes https://github.com/BerriAI/litellm/issues/9689

* (v0) Unified file id - allow calling multiple providers with same file id (#9718)

* feat(files_endpoints.py): initial commit adding 'target_model_names' support

allow developer to specify all the models they want to call with the file

* feat(files_endpoints.py): return unified files endpoint

* test(test_files_endpoints.py): add validation test - if invalid purpose submitted

* feat: more updates

* feat: initial working commit of unified file id translation

* fix: additional fixes

* fix(router.py): remove model replace logic in jsonl on acreate_file

enables file upload to work for chat completion requests as well

* fix(files_endpoints.py): remove whitespace around model name

* fix(azure/handler.py): return acreate_file with correct response type

* fix: fix linting errors

* test: fix mock test to run on github actions

* fix: fix ruff errors

* fix: fix file too large error

* fix(utils.py): remove redundant var

* test: modify test to work on github actions

* test: update tests

* test: more debug logs to understand ci/cd issue

* test: fix test for respx

* test: skip mock respx test

fails on ci/cd - not clear why

* fix: fix ruff check

* fix: fix test

* fix(model_connection_test.tsx): fix linting error

* test: update unit tests
This commit is contained in:
Krish Dholakia 2025-04-03 11:48:52 -07:00 committed by GitHub
parent 5a18eebdb6
commit 6dda1ba6dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 889 additions and 96 deletions

View file

@ -110,7 +110,10 @@ from .litellm_core_utils.fallback_utils import (
async_completion_with_fallbacks,
completion_with_fallbacks,
)
from .litellm_core_utils.prompt_templates.common_utils import get_completion_messages
from .litellm_core_utils.prompt_templates.common_utils import (
get_completion_messages,
update_messages_with_model_file_ids,
)
from .litellm_core_utils.prompt_templates.factory import (
custom_prompt,
function_call_prompt,
@ -953,7 +956,6 @@ def completion( # type: ignore # noqa: PLR0915
non_default_params = get_non_default_completion_params(kwargs=kwargs)
litellm_params = {} # used to prevent unbound var errors
## PROMPT MANAGEMENT HOOKS ##
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None:
(
model,
@ -1068,6 +1070,15 @@ def completion( # type: ignore # noqa: PLR0915
if eos_token:
custom_prompt_dict[model]["eos_token"] = eos_token
if kwargs.get("model_file_id_mapping"):
messages = update_messages_with_model_file_ids(
messages=messages,
model_id=kwargs.get("model_info", {}).get("id", None),
model_file_id_mapping=cast(
Dict[str, Dict[str, str]], kwargs.get("model_file_id_mapping")
),
)
provider_config: Optional[BaseConfig] = None
if custom_llm_provider is not None and custom_llm_provider in [
provider.value for provider in LlmProviders
@ -5799,6 +5810,19 @@ def stream_chunk_builder( # noqa: PLR0915
"content"
] = processor.get_combined_content(content_chunks)
reasoning_chunks = [
chunk
for chunk in chunks
if len(chunk["choices"]) > 0
and "reasoning_content" in chunk["choices"][0]["delta"]
and chunk["choices"][0]["delta"]["reasoning_content"] is not None
]
if len(reasoning_chunks) > 0:
response["choices"][0]["message"][
"reasoning_content"
] = processor.get_combined_reasoning_content(reasoning_chunks)
audio_chunks = [
chunk
for chunk in chunks
@ -5813,11 +5837,14 @@ def stream_chunk_builder( # noqa: PLR0915
completion_output = get_content_from_model_response(response)
reasoning_tokens = processor.count_reasoning_tokens(response)
usage = processor.calculate_usage(
chunks=chunks,
model=model,
completion_output=completion_output,
messages=messages,
reasoning_tokens=reasoning_tokens,
)
setattr(response, "usage", usage)