diff --git a/docs/my-website/docs/providers/lm_studio.md b/docs/my-website/docs/providers/lm_studio.md
index af7247424a..ace138a532 100644
--- a/docs/my-website/docs/providers/lm_studio.md
+++ b/docs/my-website/docs/providers/lm_studio.md
@@ -11,6 +11,14 @@ https://lmstudio.ai/docs/basics/server
 
 :::
 
+
+| Property | Details |
+|-------|-------|
+| Description | Discover, download, and run local LLMs. |
+| Provider Route on LiteLLM | `lm_studio/` |
+| Provider Doc | [LM Studio ↗](https://lmstudio.ai/docs/api/openai-api) |
+| Supported OpenAI Endpoints | `/chat/completions`, `/embeddings`, `/completions` |
+
 ## API Key
 ```python
 # env variable
@@ -42,7 +50,7 @@ print(response)
 from litellm import completion
 import os
 
-os.environ['XAI_API_KEY'] = ""
+os.environ['LM_STUDIO_API_KEY'] = ""
 response = completion(
     model="lm_studio/llama-3-8b-instruct",
     messages=[
@@ -131,3 +139,17 @@ Here's how to call a XAI model with the LiteLLM Proxy Server
 ## Supported Parameters
 
 See [Supported Parameters](../completion/input.md#translated-openai-params) for supported parameters.
+
+## Embedding
+
+```python
+from litellm import embedding
+import os 
+
+os.environ['LM_STUDIO_API_BASE'] = "http://localhost:8000"
+response = embedding(
+    model="lm_studio/jina-embeddings-v3",
+    input=["Hello world"],
+)
+print(response)
+```
diff --git a/docs/my-website/docs/proxy/self_serve.md b/docs/my-website/docs/proxy/self_serve.md
index 494d9e60db..3eb3d6df89 100644
--- a/docs/my-website/docs/proxy/self_serve.md
+++ b/docs/my-website/docs/proxy/self_serve.md
@@ -196,6 +196,41 @@ This budget does not apply to keys created under non-default teams.
 
 [**Go Here**](./team_budgets.md)
 
+### Auto-add SSO users to teams
+
+1. Specify the JWT field that contains the team ids, that the user belongs to. 
+
+```yaml
+general_settings:
+  master_key: sk-1234
+  litellm_jwtauth:
+    team_ids_jwt_field: "groups" # 👈 CAN BE ANY FIELD
+```
+
+This is assuming your SSO token looks like this:
+```
+{
+  ...,
+  "groups": ["team_id_1", "team_id_2"]
+}
+```
+
+2. Create the teams on LiteLLM 
+
+```bash
+curl -X POST '<PROXY_BASE_URL>/team/new' \
+-H 'Authorization: Bearer <PROXY_MASTER_KEY>' \
+-H 'Content-Type: application/json' \
+-D '{
+    "team_alias": "team_1",
+    "team_id": "team_id_1" # 👈 MUST BE THE SAME AS THE SSO GROUP ID
+}'
+```
+
+3. Test the SSO flow
+
+Here's a walkthrough of [how it works](https://www.loom.com/share/8959be458edf41fd85937452c29a33f3?sid=7ebd6d37-569a-4023-866e-e0cde67cb23e)
+
 ## **All Settings for Self Serve / SSO Flow**
 
 ```yaml
diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py
index b285bfc4b6..ba8cb167c8 100644
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@@ -1,4 +1,5 @@
 import asyncio
+import collections.abc
 import json
 import threading
 import time
@@ -34,6 +35,19 @@ MAX_THREADS = 100
 executor = ThreadPoolExecutor(max_workers=MAX_THREADS)
 
 
+def is_async_iterable(obj: Any) -> bool:
+    """
+    Check if an object is an async iterable (can be used with 'async for').
+
+    Args:
+        obj: Any Python object to check
+
+    Returns:
+        bool: True if the object is async iterable, False otherwise
+    """
+    return isinstance(obj, collections.abc.AsyncIterable)
+
+
 def print_verbose(print_statement):
     try:
         if litellm.set_verbose:
@@ -1530,36 +1544,7 @@ class CustomStreamWrapper:
             if self.completion_stream is None:
                 await self.fetch_stream()
 
-            if (
-                self.custom_llm_provider == "openai"
-                or self.custom_llm_provider == "azure"
-                or self.custom_llm_provider == "custom_openai"
-                or self.custom_llm_provider == "text-completion-openai"
-                or self.custom_llm_provider == "text-completion-codestral"
-                or self.custom_llm_provider == "azure_text"
-                or self.custom_llm_provider == "cohere_chat"
-                or self.custom_llm_provider == "cohere"
-                or self.custom_llm_provider == "anthropic"
-                or self.custom_llm_provider == "anthropic_text"
-                or self.custom_llm_provider == "huggingface"
-                or self.custom_llm_provider == "ollama"
-                or self.custom_llm_provider == "ollama_chat"
-                or self.custom_llm_provider == "vertex_ai"
-                or self.custom_llm_provider == "vertex_ai_beta"
-                or self.custom_llm_provider == "sagemaker"
-                or self.custom_llm_provider == "sagemaker_chat"
-                or self.custom_llm_provider == "gemini"
-                or self.custom_llm_provider == "replicate"
-                or self.custom_llm_provider == "cached_response"
-                or self.custom_llm_provider == "predibase"
-                or self.custom_llm_provider == "databricks"
-                or self.custom_llm_provider == "bedrock"
-                or self.custom_llm_provider == "triton"
-                or self.custom_llm_provider == "watsonx"
-                or self.custom_llm_provider == "cloudflare"
-                or self.custom_llm_provider in litellm.openai_compatible_providers
-                or self.custom_llm_provider in litellm._custom_providers
-            ):
+            if is_async_iterable(self.completion_stream):
                 async for chunk in self.completion_stream:
                     if chunk == "None" or chunk is None:
                         raise Exception
diff --git a/litellm/main.py b/litellm/main.py
index 054418e909..c97188b452 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3218,6 +3218,7 @@ def embedding(  # noqa: PLR0915
         api_base=api_base,
         api_key=api_key,
     )
+
     if dynamic_api_key is not None:
         api_key = dynamic_api_key
 
@@ -3395,18 +3396,19 @@ def embedding(  # noqa: PLR0915
             custom_llm_provider == "openai_like"
             or custom_llm_provider == "jina_ai"
             or custom_llm_provider == "hosted_vllm"
+            or custom_llm_provider == "lm_studio"
         ):
             api_base = (
                 api_base or litellm.api_base or get_secret_str("OPENAI_LIKE_API_BASE")
             )
 
             # set API KEY
-            api_key = (
-                api_key
-                or litellm.api_key
-                or litellm.openai_like_key
-                or get_secret_str("OPENAI_LIKE_API_KEY")
-            )
+            if api_key is None:
+                api_key = (
+                    litellm.api_key
+                    or litellm.openai_like_key
+                    or get_secret_str("OPENAI_LIKE_API_KEY")
+                )
 
             ## EMBEDDING CALL
             response = openai_like_embedding.embedding(
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 8694d648c2..c9762c36e7 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -25,10 +25,15 @@ model_list:
         identifier: deepseek-ai/DeepSeek-V3-Base
         revision: main
         auth_token: os.environ/HUGGINGFACE_API_KEY
+  - model_name: watsonx/ibm/granite-13b-chat-v2 # tried to keep original name for backwards compatibility but I've also tried watsonx_text
+    litellm_params: 
+      model: watsonx_text/ibm/granite-13b-chat-v2
+    model_info:
+      input_cost_per_token: 0.0000006
+      output_cost_per_token: 0.0000006
 
 # litellm_settings:
 #   key_generation_settings:
 #     personal_key_generation: # maps to 'Default Team' on UI 
 #       allowed_user_roles: ["proxy_admin"]
 
-
diff --git a/tests/local_testing/test_embedding.py b/tests/local_testing/test_embedding.py
index 6886f4892d..4aedc00871 100644
--- a/tests/local_testing/test_embedding.py
+++ b/tests/local_testing/test_embedding.py
@@ -1019,6 +1019,28 @@ def test_hosted_vllm_embedding(monkeypatch):
         assert json_data["model"] == "jina-embeddings-v3"
 
 
+def test_lm_studio_embedding(monkeypatch):
+    monkeypatch.setenv("LM_STUDIO_API_BASE", "http://localhost:8000")
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    client = HTTPHandler()
+    with patch.object(client, "post") as mock_post:
+        try:
+            embedding(
+                model="lm_studio/jina-embeddings-v3",
+                input=["Hello world"],
+                client=client,
+            )
+        except Exception as e:
+            print(e)
+
+        mock_post.assert_called_once()
+
+        json_data = json.loads(mock_post.call_args.kwargs["data"])
+        assert json_data["input"] == ["Hello world"]
+        assert json_data["model"] == "jina-embeddings-v3"
+
+
 @pytest.mark.parametrize(
     "model",
     [