diff --git a/litellm/litellm_core_utils/fallback_utils.py b/litellm/litellm_core_utils/fallback_utils.py
index 852165a830..90c55246e5 100644
--- a/litellm/litellm_core_utils/fallback_utils.py
+++ b/litellm/litellm_core_utils/fallback_utils.py
@@ -40,7 +40,7 @@ async def async_completion_with_fallbacks(**kwargs):
 
             # Handle dictionary fallback configurations
             if isinstance(fallback, dict):
-                model = fallback.get("model", original_model)
+                model = fallback.pop("model", original_model)
                 completion_kwargs.update(fallback)
             else:
                 model = fallback
diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
index 4c9e1b2a04..302865629a 100644
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -384,6 +384,7 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
             dynamic_api_key: Optional[str]
             api_base: Optional[str]
     """
+
     custom_llm_provider = model.split("/", 1)[0]
     model = model.split("/", 1)[1]
 
diff --git a/litellm/llms/lm_studio/chat/transformation.py b/litellm/llms/lm_studio/chat/transformation.py
index a4380cc5df..147e8e923f 100644
--- a/litellm/llms/lm_studio/chat/transformation.py
+++ b/litellm/llms/lm_studio/chat/transformation.py
@@ -15,6 +15,6 @@ class LMStudioChatConfig(OpenAIGPTConfig):
     ) -> Tuple[Optional[str], Optional[str]]:
         api_base = api_base or get_secret_str("LM_STUDIO_API_BASE")  # type: ignore
         dynamic_api_key = (
-            api_key or get_secret_str("LM_STUDIO_API_KEY") or ""
+            api_key or get_secret_str("LM_STUDIO_API_KEY") or " "
         )  # vllm does not require an api key
         return api_base, dynamic_api_key
diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index ff40fa1853..c46e86f824 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -358,6 +358,7 @@ class OpenAIChatCompletion(BaseLLM):
                     organization=organization,
                 )
             else:
+
                 _new_client = OpenAI(
                     api_key=api_key,
                     base_url=api_base,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index bd863ec0c2..eadecbd083 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,5 +1,13 @@
 model_list:
-  - model_name: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
+  - model_name: openai-gpt-4o
     litellm_params:
-      model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
-     
+      model: openai/my-fake-openai-endpoint
+      api_key: sk-1234
+      api_base: https://exampleopenaiendpoint-production.up.railway.app
+  - model_name: openai-o1
+    litellm_params:
+      model: openai/o1
+      api_key: sk-1234
+      api_base: https://exampleopenaiendpoint-production.up.railway.app
+      mock_timeout: true
+      timeout: 3
diff --git a/litellm/router.py b/litellm/router.py
index 1747672bbb..e0c5026cd1 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -810,6 +810,7 @@ class Router:
             kwargs["messages"] = messages
             kwargs["stream"] = stream
             kwargs["original_function"] = self._acompletion
+
             self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
             request_priority = kwargs.get("priority") or self.default_priority
             start_time = time.time()
@@ -891,8 +892,8 @@ class Router:
                 deployment=deployment, parent_otel_span=parent_otel_span
             )
             self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
-
             data = deployment["litellm_params"].copy()
+
             model_name = data["model"]
 
             model_client = self._get_async_openai_model_client(
@@ -2790,9 +2791,12 @@ class Router:
                 content_policy_fallbacks=content_policy_fallbacks,
             )
 
-            response = await self.async_function_with_retries(
-                *args, **kwargs, mock_timeout=mock_timeout
-            )
+            if mock_timeout is not None:
+                response = await self.async_function_with_retries(
+                    *args, **kwargs, mock_timeout=mock_timeout
+                )
+            else:
+                response = await self.async_function_with_retries(*args, **kwargs)
             verbose_router_logger.debug(f"Async Response: {response}")
             return response
         except Exception as e:
diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
index 42e79a59e6..02d91cfb65 100644
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@@ -1756,6 +1756,23 @@ async def test_openai_compatible_custom_api_base(provider):
         assert "hello" in mock_call.call_args.kwargs["extra_body"]
 
 
+def test_lm_studio_completion(monkeypatch):
+    monkeypatch.delenv("LM_STUDIO_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    try:
+        completion(
+            model="lm_studio/typhoon2-quen2.5-7b-instruct",
+            messages=[
+                {"role": "user", "content": "What's the weather like in San Francisco?"}
+            ],
+            api_base="https://exampleopenaiendpoint-production.up.railway.app/",
+        )
+    except litellm.AuthenticationError as e:
+        pytest.fail(f"Error occurred: {e}")
+    except litellm.APIError as e:
+        print(e)
+
+
 @pytest.mark.asyncio
 async def test_litellm_gateway_from_sdk():
     litellm.set_verbose = True