diff --git a/litellm/main.py b/litellm/main.py index 0d3e67449..81a5981f9 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2950,7 +2950,9 @@ def embedding( model=model, # type: ignore llm_provider="ollama", # type: ignore ) - ollama_embeddings_fn = ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings + ollama_embeddings_fn = ( + ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings + ) response = ollama_embeddings_fn( api_base=api_base, model=model, @@ -3094,6 +3096,7 @@ async def atext_completion(*args, **kwargs): or custom_llm_provider == "huggingface" or custom_llm_provider == "ollama" or custom_llm_provider == "vertex_ai" + or custom_llm_provider in litellm.openai_compatible_providers ): # currently implemented aiohttp calls for just azure and openai, soon all. # Await normally response = await loop.run_in_executor(None, func_with_context) @@ -3124,6 +3127,8 @@ async def atext_completion(*args, **kwargs): ## TRANSLATE CHAT TO TEXT FORMAT ## if isinstance(response, TextCompletionResponse): return response + elif asyncio.iscoroutine(response): + response = await response text_completion_response = TextCompletionResponse() text_completion_response["id"] = response.get("id", None) diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index b7293a17f..4ea984611 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -17,6 +17,9 @@ model_list: - model_name: gpt-4 litellm_params: model: gpt-3.5-turbo +- litellm_params: + model: together_ai/codellama/CodeLlama-13b-Instruct-hf + model_name: CodeLlama-13b-Instruct router_settings: num_retries: 0 enable_pre_call_checks: true diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py index 0b4103ca0..6a093af23 100644 --- a/litellm/tests/test_text_completion.py +++ b/litellm/tests/test_text_completion.py @@ -3990,6 +3990,26 @@ def test_async_text_completion(): asyncio.run(test_get_response()) +def test_async_text_completion_together_ai(): + litellm.set_verbose = True + print("test_async_text_completion") + + async def test_get_response(): + try: + response = await litellm.atext_completion( + model="together_ai/codellama/CodeLlama-13b-Instruct-hf", + prompt="good morning", + max_tokens=10, + ) + print(f"response: {response}") + except litellm.Timeout as e: + print(e) + except Exception as e: + pytest.fail("An unexpected error occurred") + + asyncio.run(test_get_response()) + + # test_async_text_completion()