fix(main.py): fix together ai text completion call

2024-05-08 09:10:45 -07:00 · 2024-05-08 09:10:45 -07:00 · a854824c02
commit a854824c02
parent 59080431b8
3 changed files with 29 additions and 1 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2950,7 +2950,9 @@ def embedding(
                    model=model,  # type: ignore
                    llm_provider="ollama",  # type: ignore
                )
-            ollama_embeddings_fn = ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
+            ollama_embeddings_fn = (
+                ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
+            )
            response = ollama_embeddings_fn(
                api_base=api_base,
                model=model,
@ -3094,6 +3096,7 @@ async def atext_completion(*args, **kwargs):
            or custom_llm_provider == "huggingface"
            or custom_llm_provider == "ollama"
            or custom_llm_provider == "vertex_ai"
+            or custom_llm_provider in litellm.openai_compatible_providers
        ):  # currently implemented aiohttp calls for just azure and openai, soon all.
            # Await normally
            response = await loop.run_in_executor(None, func_with_context)
@ -3124,6 +3127,8 @@ async def atext_completion(*args, **kwargs):
            ## TRANSLATE CHAT TO TEXT FORMAT ##
            if isinstance(response, TextCompletionResponse):
                return response
+            elif asyncio.iscoroutine(response):
+                response = await response

            text_completion_response = TextCompletionResponse()
            text_completion_response["id"] = response.get("id", None)
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -17,6 +17,9 @@ model_list:
 - model_name: gpt-4
  litellm_params:
    model: gpt-3.5-turbo
+- litellm_params:
+    model: together_ai/codellama/CodeLlama-13b-Instruct-hf
+  model_name: CodeLlama-13b-Instruct
 router_settings:
  num_retries: 0
  enable_pre_call_checks: true
--- a/litellm/tests/test_text_completion.py
+++ b/litellm/tests/test_text_completion.py
@ -3990,6 +3990,26 @@ def test_async_text_completion():
    asyncio.run(test_get_response())


+def test_async_text_completion_together_ai():
+    litellm.set_verbose = True
+    print("test_async_text_completion")
+
+    async def test_get_response():
+        try:
+            response = await litellm.atext_completion(
+                model="together_ai/codellama/CodeLlama-13b-Instruct-hf",
+                prompt="good morning",
+                max_tokens=10,
+            )
+            print(f"response: {response}")
+        except litellm.Timeout as e:
+            print(e)
+        except Exception as e:
+            pytest.fail("An unexpected error occurred")
+
+    asyncio.run(test_get_response())
+
+
 # test_async_text_completion()