diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 8b6fae40f2..12d0428ba4 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3107,6 +3107,12 @@ async def completion( response = await llm_router.atext_completion( **data, specific_deployment=True ) + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.atext_completion(**data) elif user_model is not None: # `litellm --model ` response = await litellm.atext_completion(**data) else: @@ -3324,6 +3330,12 @@ async def chat_completion( llm_router is not None and data["model"] in llm_router.deployment_names ): # model in router deployments, calling a specific deployment on the router tasks.append(llm_router.acompletion(**data, specific_deployment=True)) + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + tasks.append(llm_router.acompletion(**data)) elif user_model is not None: # `litellm --model ` tasks.append(litellm.acompletion(**data)) else: @@ -3538,6 +3550,12 @@ async def embeddings( llm_router is not None and data["model"] in llm_router.deployment_names ): # model in router deployments, calling a specific deployment on the router response = await llm_router.aembedding(**data, specific_deployment=True) + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.aembedding(**data) elif user_model is not None: # `litellm --model ` response = await litellm.aembedding(**data) else: @@ -3683,6 +3701,12 @@ async def image_generation( response = await llm_router.aimage_generation( **data ) # ensure this goes the llm_router, router will do the correct alias mapping + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.aimage_generation(**data) elif user_model is not None: # `litellm --model ` response = await litellm.aimage_generation(**data) else: @@ -3837,6 +3861,12 @@ async def audio_transcriptions( response = await llm_router.atranscription( **data ) # ensure this goes the llm_router, router will do the correct alias mapping + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.atranscription(**data) elif user_model is not None: # `litellm --model ` response = await litellm.atranscription(**data) else: @@ -3990,6 +4020,12 @@ async def moderations( response = await llm_router.amoderation( **data ) # ensure this goes the llm_router, router will do the correct alias mapping + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.amoderation(**data) elif user_model is not None: # `litellm --model ` response = await litellm.amoderation(**data) else: