forked from phoenix/litellm-mirror
fix(main.py): fix together ai text completion call
This commit is contained in:
parent
59080431b8
commit
a854824c02
3 changed files with 29 additions and 1 deletions
|
@ -2950,7 +2950,9 @@ def embedding(
|
||||||
model=model, # type: ignore
|
model=model, # type: ignore
|
||||||
llm_provider="ollama", # type: ignore
|
llm_provider="ollama", # type: ignore
|
||||||
)
|
)
|
||||||
ollama_embeddings_fn = ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
|
ollama_embeddings_fn = (
|
||||||
|
ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
|
||||||
|
)
|
||||||
response = ollama_embeddings_fn(
|
response = ollama_embeddings_fn(
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -3094,6 +3096,7 @@ async def atext_completion(*args, **kwargs):
|
||||||
or custom_llm_provider == "huggingface"
|
or custom_llm_provider == "huggingface"
|
||||||
or custom_llm_provider == "ollama"
|
or custom_llm_provider == "ollama"
|
||||||
or custom_llm_provider == "vertex_ai"
|
or custom_llm_provider == "vertex_ai"
|
||||||
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
||||||
# Await normally
|
# Await normally
|
||||||
response = await loop.run_in_executor(None, func_with_context)
|
response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
@ -3124,6 +3127,8 @@ async def atext_completion(*args, **kwargs):
|
||||||
## TRANSLATE CHAT TO TEXT FORMAT ##
|
## TRANSLATE CHAT TO TEXT FORMAT ##
|
||||||
if isinstance(response, TextCompletionResponse):
|
if isinstance(response, TextCompletionResponse):
|
||||||
return response
|
return response
|
||||||
|
elif asyncio.iscoroutine(response):
|
||||||
|
response = await response
|
||||||
|
|
||||||
text_completion_response = TextCompletionResponse()
|
text_completion_response = TextCompletionResponse()
|
||||||
text_completion_response["id"] = response.get("id", None)
|
text_completion_response["id"] = response.get("id", None)
|
||||||
|
|
|
@ -17,6 +17,9 @@ model_list:
|
||||||
- model_name: gpt-4
|
- model_name: gpt-4
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo
|
model: gpt-3.5-turbo
|
||||||
|
- litellm_params:
|
||||||
|
model: together_ai/codellama/CodeLlama-13b-Instruct-hf
|
||||||
|
model_name: CodeLlama-13b-Instruct
|
||||||
router_settings:
|
router_settings:
|
||||||
num_retries: 0
|
num_retries: 0
|
||||||
enable_pre_call_checks: true
|
enable_pre_call_checks: true
|
||||||
|
|
|
@ -3990,6 +3990,26 @@ def test_async_text_completion():
|
||||||
asyncio.run(test_get_response())
|
asyncio.run(test_get_response())
|
||||||
|
|
||||||
|
|
||||||
|
def test_async_text_completion_together_ai():
|
||||||
|
litellm.set_verbose = True
|
||||||
|
print("test_async_text_completion")
|
||||||
|
|
||||||
|
async def test_get_response():
|
||||||
|
try:
|
||||||
|
response = await litellm.atext_completion(
|
||||||
|
model="together_ai/codellama/CodeLlama-13b-Instruct-hf",
|
||||||
|
prompt="good morning",
|
||||||
|
max_tokens=10,
|
||||||
|
)
|
||||||
|
print(f"response: {response}")
|
||||||
|
except litellm.Timeout as e:
|
||||||
|
print(e)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail("An unexpected error occurred")
|
||||||
|
|
||||||
|
asyncio.run(test_get_response())
|
||||||
|
|
||||||
|
|
||||||
# test_async_text_completion()
|
# test_async_text_completion()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue