fix(main.py): fix together ai text completion call

2024-05-08 09:10:45 -07:00 · 2024-05-08 09:10:45 -07:00 · a854824c02
commit a854824c02
parent 59080431b8
3 changed files with 29 additions and 1 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2950,7 +2950,9 @@ def embedding(
                    model=model,  # type: ignore
                    llm_provider="ollama",  # type: ignore
                )
-            ollama_embeddings_fn = ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
+            ollama_embeddings_fn = (
                ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
            )
            response = ollama_embeddings_fn(
                api_base=api_base,
                model=model,
@ -3094,6 +3096,7 @@ async def atext_completion(*args, **kwargs):
            or custom_llm_provider == "huggingface"
            or custom_llm_provider == "ollama"
            or custom_llm_provider == "vertex_ai"
            or custom_llm_provider in litellm.openai_compatible_providers
        ):  # currently implemented aiohttp calls for just azure and openai, soon all.
            # Await normally
            response = await loop.run_in_executor(None, func_with_context)
@ -3124,6 +3127,8 @@ async def atext_completion(*args, **kwargs):
            ## TRANSLATE CHAT TO TEXT FORMAT ##
            if isinstance(response, TextCompletionResponse):
                return response
            elif asyncio.iscoroutine(response):
                response = await response
            text_completion_response = TextCompletionResponse()
            text_completion_response["id"] = response.get("id", None)
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -17,6 +17,9 @@ model_list:
 - model_name: gpt-4
  litellm_params:
    model: gpt-3.5-turbo
 - litellm_params:
    model: together_ai/codellama/CodeLlama-13b-Instruct-hf
  model_name: CodeLlama-13b-Instruct
 router_settings:
  num_retries: 0
  enable_pre_call_checks: true
--- a/litellm/tests/test_text_completion.py
+++ b/litellm/tests/test_text_completion.py
@ -3990,6 +3990,26 @@ def test_async_text_completion():
    asyncio.run(test_get_response())
 def test_async_text_completion_together_ai():
    litellm.set_verbose = True
    print("test_async_text_completion")
    async def test_get_response():
        try:
            response = await litellm.atext_completion(
                model="together_ai/codellama/CodeLlama-13b-Instruct-hf",
                prompt="good morning",
                max_tokens=10,
            )
            print(f"response: {response}")
        except litellm.Timeout as e:
            print(e)
        except Exception as e:
            pytest.fail("An unexpected error occurred")
    asyncio.run(test_get_response())
 # test_async_text_completion()