forked from phoenix/litellm-mirror
fix(main.py): fix together ai text completion call
This commit is contained in:
parent
59080431b8
commit
a854824c02
3 changed files with 29 additions and 1 deletions
|
@ -2950,7 +2950,9 @@ def embedding(
|
|||
model=model, # type: ignore
|
||||
llm_provider="ollama", # type: ignore
|
||||
)
|
||||
ollama_embeddings_fn = ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
|
||||
ollama_embeddings_fn = (
|
||||
ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
|
||||
)
|
||||
response = ollama_embeddings_fn(
|
||||
api_base=api_base,
|
||||
model=model,
|
||||
|
@ -3094,6 +3096,7 @@ async def atext_completion(*args, **kwargs):
|
|||
or custom_llm_provider == "huggingface"
|
||||
or custom_llm_provider == "ollama"
|
||||
or custom_llm_provider == "vertex_ai"
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
||||
# Await normally
|
||||
response = await loop.run_in_executor(None, func_with_context)
|
||||
|
@ -3124,6 +3127,8 @@ async def atext_completion(*args, **kwargs):
|
|||
## TRANSLATE CHAT TO TEXT FORMAT ##
|
||||
if isinstance(response, TextCompletionResponse):
|
||||
return response
|
||||
elif asyncio.iscoroutine(response):
|
||||
response = await response
|
||||
|
||||
text_completion_response = TextCompletionResponse()
|
||||
text_completion_response["id"] = response.get("id", None)
|
||||
|
|
|
@ -17,6 +17,9 @@ model_list:
|
|||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
- litellm_params:
|
||||
model: together_ai/codellama/CodeLlama-13b-Instruct-hf
|
||||
model_name: CodeLlama-13b-Instruct
|
||||
router_settings:
|
||||
num_retries: 0
|
||||
enable_pre_call_checks: true
|
||||
|
|
|
@ -3990,6 +3990,26 @@ def test_async_text_completion():
|
|||
asyncio.run(test_get_response())
|
||||
|
||||
|
||||
def test_async_text_completion_together_ai():
|
||||
litellm.set_verbose = True
|
||||
print("test_async_text_completion")
|
||||
|
||||
async def test_get_response():
|
||||
try:
|
||||
response = await litellm.atext_completion(
|
||||
model="together_ai/codellama/CodeLlama-13b-Instruct-hf",
|
||||
prompt="good morning",
|
||||
max_tokens=10,
|
||||
)
|
||||
print(f"response: {response}")
|
||||
except litellm.Timeout as e:
|
||||
print(e)
|
||||
except Exception as e:
|
||||
pytest.fail("An unexpected error occurred")
|
||||
|
||||
asyncio.run(test_get_response())
|
||||
|
||||
|
||||
# test_async_text_completion()
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue