LiteLLM Minor Fixes & Improvements (2024/16/01) (#7826)

* fix(lm_studio/chat/transformation.py): Fix https://github.com/BerriAI/litellm/issues/7811 * fix(router.py): fix mock timeout check * fix: drop model name from fallback args since it causes a conflict with the model=model that is provided later on. (#7806) This error happens if you provide multiple fallback models to the completion function with model name defined in each one. * fix(router.py): remove mock_timeout before sending to request prevents reuse in fallbacks * test: update test * test: revert test change - wrong pr --------- Co-authored-by: Dudu Lasry <david1542@users.noreply.github.com>
2025-04-25 18:54:30 +00:00 · 2025-01-17 20:59:21 -08:00 · 2025-01-17 20:59:21 -08:00 · 1bea338597
commit 1bea338597
parent a2762fb273
7 changed files with 40 additions and 9 deletions
--- a/litellm/litellm_core_utils/fallback_utils.py
+++ b/litellm/litellm_core_utils/fallback_utils.py
@ -40,7 +40,7 @@ async def async_completion_with_fallbacks(**kwargs):

            # Handle dictionary fallback configurations
            if isinstance(fallback, dict):
-                model = fallback.get("model", original_model)
+                model = fallback.pop("model", original_model)
                completion_kwargs.update(fallback)
            else:
                model = fallback
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@ -384,6 +384,7 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
            dynamic_api_key: Optional[str]
            api_base: Optional[str]
    """
+
    custom_llm_provider = model.split("/", 1)[0]
    model = model.split("/", 1)[1]

--- a/litellm/llms/lm_studio/chat/transformation.py
+++ b/litellm/llms/lm_studio/chat/transformation.py
@ -15,6 +15,6 @@ class LMStudioChatConfig(OpenAIGPTConfig):
    ) -> Tuple[Optional[str], Optional[str]]:
        api_base = api_base or get_secret_str("LM_STUDIO_API_BASE")  # type: ignore
        dynamic_api_key = (
-            api_key or get_secret_str("LM_STUDIO_API_KEY") or ""
+            api_key or get_secret_str("LM_STUDIO_API_KEY") or " "
        )  # vllm does not require an api key
        return api_base, dynamic_api_key
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -358,6 +358,7 @@ class OpenAIChatCompletion(BaseLLM):
                    organization=organization,
                )
            else:
+
                _new_client = OpenAI(
                    api_key=api_key,
                    base_url=api_base,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,5 +1,13 @@
 model_list:
-  - model_name: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
+  - model_name: openai-gpt-4o
    litellm_params:
-      model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
-     
+      model: openai/my-fake-openai-endpoint
+      api_key: sk-1234
+      api_base: https://exampleopenaiendpoint-production.up.railway.app
+  - model_name: openai-o1
+    litellm_params:
+      model: openai/o1
+      api_key: sk-1234
+      api_base: https://exampleopenaiendpoint-production.up.railway.app
+      mock_timeout: true
+      timeout: 3
--- a/litellm/router.py
+++ b/litellm/router.py
@ -810,6 +810,7 @@ class Router:
            kwargs["messages"] = messages
            kwargs["stream"] = stream
            kwargs["original_function"] = self._acompletion
+
            self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
            request_priority = kwargs.get("priority") or self.default_priority
            start_time = time.time()
@ -891,8 +892,8 @@ class Router:
                deployment=deployment, parent_otel_span=parent_otel_span
            )
            self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
-
            data = deployment["litellm_params"].copy()
+
            model_name = data["model"]

            model_client = self._get_async_openai_model_client(
@ -2790,9 +2791,12 @@ class Router:
                content_policy_fallbacks=content_policy_fallbacks,
            )

+            if mock_timeout is not None:
                response = await self.async_function_with_retries(
                    *args, **kwargs, mock_timeout=mock_timeout
                )
+            else:
+                response = await self.async_function_with_retries(*args, **kwargs)
            verbose_router_logger.debug(f"Async Response: {response}")
            return response
        except Exception as e:
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -1756,6 +1756,23 @@ async def test_openai_compatible_custom_api_base(provider):
        assert "hello" in mock_call.call_args.kwargs["extra_body"]


+def test_lm_studio_completion(monkeypatch):
+    monkeypatch.delenv("LM_STUDIO_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    try:
+        completion(
+            model="lm_studio/typhoon2-quen2.5-7b-instruct",
+            messages=[
+                {"role": "user", "content": "What's the weather like in San Francisco?"}
+            ],
+            api_base="https://exampleopenaiendpoint-production.up.railway.app/",
+        )
+    except litellm.AuthenticationError as e:
+        pytest.fail(f"Error occurred: {e}")
+    except litellm.APIError as e:
+        print(e)
+
+
@pytest.mark.asyncio
 async def test_litellm_gateway_from_sdk():
    litellm.set_verbose = True