LiteLLM Minor Fixes & Improvements (2024/16/01) (#7826)

* fix(lm_studio/chat/transformation.py): Fix https://github.com/BerriAI/litellm/issues/7811

* fix(router.py): fix mock timeout check

* fix: drop model name from fallback args since it causes a conflict with the model=model that is provided later on. (#7806)

This error happens if you provide multiple fallback models to the completion function with model name defined in each one.

* fix(router.py): remove mock_timeout before sending to request

prevents reuse in fallbacks

* test: update test

* test: revert test change - wrong pr

---------

Co-authored-by: Dudu Lasry <david1542@users.noreply.github.com>
This commit is contained in:
Krish Dholakia 2025-01-17 20:59:21 -08:00 committed by GitHub
parent a2762fb273
commit 1bea338597
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 40 additions and 9 deletions

View file

@ -810,6 +810,7 @@ class Router:
kwargs["messages"] = messages
kwargs["stream"] = stream
kwargs["original_function"] = self._acompletion
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
request_priority = kwargs.get("priority") or self.default_priority
start_time = time.time()
@ -891,8 +892,8 @@ class Router:
deployment=deployment, parent_otel_span=parent_otel_span
)
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
data = deployment["litellm_params"].copy()
model_name = data["model"]
model_client = self._get_async_openai_model_client(
@ -2790,9 +2791,12 @@ class Router:
content_policy_fallbacks=content_policy_fallbacks,
)
response = await self.async_function_with_retries(
*args, **kwargs, mock_timeout=mock_timeout
)
if mock_timeout is not None:
response = await self.async_function_with_retries(
*args, **kwargs, mock_timeout=mock_timeout
)
else:
response = await self.async_function_with_retries(*args, **kwargs)
verbose_router_logger.debug(f"Async Response: {response}")
return response
except Exception as e: