diff --git a/litellm/litellm_core_utils/fallback_utils.py b/litellm/litellm_core_utils/fallback_utils.py index 852165a830..90c55246e5 100644 --- a/litellm/litellm_core_utils/fallback_utils.py +++ b/litellm/litellm_core_utils/fallback_utils.py @@ -40,7 +40,7 @@ async def async_completion_with_fallbacks(**kwargs): # Handle dictionary fallback configurations if isinstance(fallback, dict): - model = fallback.get("model", original_model) + model = fallback.pop("model", original_model) completion_kwargs.update(fallback) else: model = fallback diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index 4c9e1b2a04..302865629a 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -384,6 +384,7 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915 dynamic_api_key: Optional[str] api_base: Optional[str] """ + custom_llm_provider = model.split("/", 1)[0] model = model.split("/", 1)[1] diff --git a/litellm/llms/lm_studio/chat/transformation.py b/litellm/llms/lm_studio/chat/transformation.py index a4380cc5df..147e8e923f 100644 --- a/litellm/llms/lm_studio/chat/transformation.py +++ b/litellm/llms/lm_studio/chat/transformation.py @@ -15,6 +15,6 @@ class LMStudioChatConfig(OpenAIGPTConfig): ) -> Tuple[Optional[str], Optional[str]]: api_base = api_base or get_secret_str("LM_STUDIO_API_BASE") # type: ignore dynamic_api_key = ( - api_key or get_secret_str("LM_STUDIO_API_KEY") or "" + api_key or get_secret_str("LM_STUDIO_API_KEY") or " " ) # vllm does not require an api key return api_base, dynamic_api_key diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index ff40fa1853..c46e86f824 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -358,6 +358,7 @@ class OpenAIChatCompletion(BaseLLM): organization=organization, ) else: + _new_client = OpenAI( api_key=api_key, base_url=api_base, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index bd863ec0c2..eadecbd083 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,5 +1,13 @@ model_list: - - model_name: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0 + - model_name: openai-gpt-4o litellm_params: - model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0 - + model: openai/my-fake-openai-endpoint + api_key: sk-1234 + api_base: https://exampleopenaiendpoint-production.up.railway.app + - model_name: openai-o1 + litellm_params: + model: openai/o1 + api_key: sk-1234 + api_base: https://exampleopenaiendpoint-production.up.railway.app + mock_timeout: true + timeout: 3 diff --git a/litellm/router.py b/litellm/router.py index 1747672bbb..e0c5026cd1 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -810,6 +810,7 @@ class Router: kwargs["messages"] = messages kwargs["stream"] = stream kwargs["original_function"] = self._acompletion + self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs) request_priority = kwargs.get("priority") or self.default_priority start_time = time.time() @@ -891,8 +892,8 @@ class Router: deployment=deployment, parent_otel_span=parent_otel_span ) self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) - data = deployment["litellm_params"].copy() + model_name = data["model"] model_client = self._get_async_openai_model_client( @@ -2790,9 +2791,12 @@ class Router: content_policy_fallbacks=content_policy_fallbacks, ) - response = await self.async_function_with_retries( - *args, **kwargs, mock_timeout=mock_timeout - ) + if mock_timeout is not None: + response = await self.async_function_with_retries( + *args, **kwargs, mock_timeout=mock_timeout + ) + else: + response = await self.async_function_with_retries(*args, **kwargs) verbose_router_logger.debug(f"Async Response: {response}") return response except Exception as e: diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index 42e79a59e6..02d91cfb65 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -1756,6 +1756,23 @@ async def test_openai_compatible_custom_api_base(provider): assert "hello" in mock_call.call_args.kwargs["extra_body"] +def test_lm_studio_completion(monkeypatch): + monkeypatch.delenv("LM_STUDIO_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + try: + completion( + model="lm_studio/typhoon2-quen2.5-7b-instruct", + messages=[ + {"role": "user", "content": "What's the weather like in San Francisco?"} + ], + api_base="https://exampleopenaiendpoint-production.up.railway.app/", + ) + except litellm.AuthenticationError as e: + pytest.fail(f"Error occurred: {e}") + except litellm.APIError as e: + print(e) + + @pytest.mark.asyncio async def test_litellm_gateway_from_sdk(): litellm.set_verbose = True