LiteLLM Minor Fixes & Improvements (2024/16/01) (#7826)

* fix(lm_studio/chat/transformation.py): Fix https://github.com/BerriAI/litellm/issues/7811 * fix(router.py): fix mock timeout check * fix: drop model name from fallback args since it causes a conflict with the model=model that is provided later on. (#7806) This error happens if you provide multiple fallback models to the completion function with model name defined in each one. * fix(router.py): remove mock_timeout before sending to request prevents reuse in fallbacks * test: update test * test: revert test change - wrong pr --------- Co-authored-by: Dudu Lasry <david1542@users.noreply.github.com>
2025-04-25 10:44:24 +00:00 · 2025-01-17 20:59:21 -08:00 · 2025-01-17 20:59:21 -08:00 · 1bea338597
commit 1bea338597
parent a2762fb273
7 changed files with 40 additions and 9 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -810,6 +810,7 @@ class Router:
            kwargs["messages"] = messages
            kwargs["stream"] = stream
            kwargs["original_function"] = self._acompletion
+
            self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
            request_priority = kwargs.get("priority") or self.default_priority
            start_time = time.time()
@ -891,8 +892,8 @@ class Router:
                deployment=deployment, parent_otel_span=parent_otel_span
            )
            self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
-
            data = deployment["litellm_params"].copy()
+
            model_name = data["model"]

            model_client = self._get_async_openai_model_client(
@ -2790,9 +2791,12 @@ class Router:
                content_policy_fallbacks=content_policy_fallbacks,
            )

-            response = await self.async_function_with_retries(
-                *args, **kwargs, mock_timeout=mock_timeout
-            )
+            if mock_timeout is not None:
+                response = await self.async_function_with_retries(
+                    *args, **kwargs, mock_timeout=mock_timeout
+                )
+            else:
+                response = await self.async_function_with_retries(*args, **kwargs)
            verbose_router_logger.debug(f"Async Response: {response}")
            return response
        except Exception as e: