fix(main.py): fix lm_studio/ embedding routing (#7658)

* fix(main.py): fix lm_studio/ embedding routing adds the mapping + updates docs with example * docs(self_serve.md): update doc to show how to auto-add sso users to teams * fix(streaming_handler.py): simplify async iterator check, to just check if streaming response is an async iterable
2025-04-27 03:34:10 +00:00 · 2025-01-09 23:03:24 -08:00 · 2025-01-09 23:03:24 -08:00 · afdcbe3d64
commit afdcbe3d64
parent 14d4b695df
6 changed files with 109 additions and 38 deletions
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@ -1,4 +1,5 @@
 import asyncio
+import collections.abc
 import json
 import threading
 import time
@ -34,6 +35,19 @@ MAX_THREADS = 100
 executor = ThreadPoolExecutor(max_workers=MAX_THREADS)


+def is_async_iterable(obj: Any) -> bool:
+    """
+    Check if an object is an async iterable (can be used with 'async for').
+
+    Args:
+        obj: Any Python object to check
+
+    Returns:
+        bool: True if the object is async iterable, False otherwise
+    """
+    return isinstance(obj, collections.abc.AsyncIterable)
+
+
 def print_verbose(print_statement):
    try:
        if litellm.set_verbose:
@ -1530,36 +1544,7 @@ class CustomStreamWrapper:
            if self.completion_stream is None:
                await self.fetch_stream()

-            if (
-                self.custom_llm_provider == "openai"
-                or self.custom_llm_provider == "azure"
-                or self.custom_llm_provider == "custom_openai"
-                or self.custom_llm_provider == "text-completion-openai"
-                or self.custom_llm_provider == "text-completion-codestral"
-                or self.custom_llm_provider == "azure_text"
-                or self.custom_llm_provider == "cohere_chat"
-                or self.custom_llm_provider == "cohere"
-                or self.custom_llm_provider == "anthropic"
-                or self.custom_llm_provider == "anthropic_text"
-                or self.custom_llm_provider == "huggingface"
-                or self.custom_llm_provider == "ollama"
-                or self.custom_llm_provider == "ollama_chat"
-                or self.custom_llm_provider == "vertex_ai"
-                or self.custom_llm_provider == "vertex_ai_beta"
-                or self.custom_llm_provider == "sagemaker"
-                or self.custom_llm_provider == "sagemaker_chat"
-                or self.custom_llm_provider == "gemini"
-                or self.custom_llm_provider == "replicate"
-                or self.custom_llm_provider == "cached_response"
-                or self.custom_llm_provider == "predibase"
-                or self.custom_llm_provider == "databricks"
-                or self.custom_llm_provider == "bedrock"
-                or self.custom_llm_provider == "triton"
-                or self.custom_llm_provider == "watsonx"
-                or self.custom_llm_provider == "cloudflare"
-                or self.custom_llm_provider in litellm.openai_compatible_providers
-                or self.custom_llm_provider in litellm._custom_providers
-            ):
+            if is_async_iterable(self.completion_stream):
                async for chunk in self.completion_stream:
                    if chunk == "None" or chunk is None:
                        raise Exception