fix(ollama.py): fix async completion calls for ollama

This commit is contained in:
Krrish Dholakia 2023-12-13 13:10:25 -08:00
parent 52375e0377
commit 7b8851cce5
7 changed files with 35 additions and 17 deletions

View file

@ -1004,6 +1004,7 @@ async def chat_completion(request: Request, model: Optional[str] = None, user_ap
### ROUTE THE REQUEST ###
router_model_names = [m["model_name"] for m in llm_model_list] if llm_model_list is not None else []
if llm_router is not None and data["model"] in router_model_names: # model in router model list
print(f"ENTERS LLM ROUTER ACOMPLETION")
response = await llm_router.acompletion(**data)
elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router
response = await llm_router.acompletion(**data, specific_deployment = True)