(fix) router - allow users to call a specific_model explicit

This commit is contained in:
ishaan-jaff 2023-12-05 21:55:07 -08:00
parent acef6bd58d
commit 8e6c4c5310
2 changed files with 27 additions and 16 deletions

View file

@ -861,7 +861,7 @@ async def chat_completion(request: Request, model: Optional[str] = None, user_ap
if llm_router is not None and data["model"] in router_model_names: # model in router model list if llm_router is not None and data["model"] in router_model_names: # model in router model list
response = await llm_router.acompletion(**data) response = await llm_router.acompletion(**data)
elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router
response = await llm_router.acompletion(**data) response = await llm_router.acompletion(**data, specific_deployment = True)
else: # router is not set else: # router is not set
response = await litellm.acompletion(**data) response = await litellm.acompletion(**data)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
@ -923,7 +923,7 @@ async def embeddings(request: Request, user_api_key_dict: UserAPIKeyAuth = Depen
if llm_router is not None and data["model"] in router_model_names: # model in router model list if llm_router is not None and data["model"] in router_model_names: # model in router model list
response = await llm_router.aembedding(**data) response = await llm_router.aembedding(**data)
elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router
response = await llm_router.aembedding(**data) response = await llm_router.aembedding(**data, specific_deployment = True)
else: else:
response = await litellm.aembedding(**data) response = await litellm.aembedding(**data)
background_tasks.add_task(log_input_output, request, response) # background task for logging to OTEL background_tasks.add_task(log_input_output, request, response) # background task for logging to OTEL

View file

@ -184,7 +184,7 @@ class Router:
try: try:
# pick the one that is available (lowest TPM/RPM) # pick the one that is available (lowest TPM/RPM)
deployment = self.get_available_deployment(model=model, messages=messages) deployment = self.get_available_deployment(model=model, messages=messages, specific_deployment=kwargs.pop("specific_deployment", None))
kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]}) kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]})
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
for k, v in self.default_litellm_params.items(): for k, v in self.default_litellm_params.items():
@ -232,7 +232,7 @@ class Router:
try: try:
self.print_verbose(f"Inside _acompletion()- model: {model}; kwargs: {kwargs}") self.print_verbose(f"Inside _acompletion()- model: {model}; kwargs: {kwargs}")
original_model_string = None # set a default for this variable original_model_string = None # set a default for this variable
deployment = self.get_available_deployment(model=model, messages=messages) deployment = self.get_available_deployment(model=model, messages=messages, specific_deployment=kwargs.pop("specific_deployment", None))
kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]}) kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]})
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
for k, v in self.default_litellm_params.items(): for k, v in self.default_litellm_params.items():
@ -268,7 +268,7 @@ class Router:
kwargs.setdefault("metadata", {}).update({"model_group": model}) kwargs.setdefault("metadata", {}).update({"model_group": model})
messages=[{"role": "user", "content": prompt}] messages=[{"role": "user", "content": prompt}]
# pick the one that is available (lowest TPM/RPM) # pick the one that is available (lowest TPM/RPM)
deployment = self.get_available_deployment(model=model, messages=messages) deployment = self.get_available_deployment(model=model, messages=messages, specific_deployment=kwargs.pop("specific_deployment", None))
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
for k, v in self.default_litellm_params.items(): for k, v in self.default_litellm_params.items():
@ -301,7 +301,7 @@ class Router:
is_async: Optional[bool] = False, is_async: Optional[bool] = False,
**kwargs) -> Union[List[float], None]: **kwargs) -> Union[List[float], None]:
# pick the one that is available (lowest TPM/RPM) # pick the one that is available (lowest TPM/RPM)
deployment = self.get_available_deployment(model=model, input=input) deployment = self.get_available_deployment(model=model, input=input, specific_deployment=kwargs.pop("specific_deployment", None))
kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]}) kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]})
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
for k, v in self.default_litellm_params.items(): for k, v in self.default_litellm_params.items():
@ -326,7 +326,7 @@ class Router:
is_async: Optional[bool] = True, is_async: Optional[bool] = True,
**kwargs) -> Union[List[float], None]: **kwargs) -> Union[List[float], None]:
# pick the one that is available (lowest TPM/RPM) # pick the one that is available (lowest TPM/RPM)
deployment = self.get_available_deployment(model=model, input=input) deployment = self.get_available_deployment(model=model, input=input, specific_deployment=kwargs.pop("specific_deployment", None))
kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]}) kwargs.setdefault("metadata", {}).update({"deployment": deployment["litellm_params"]["model"]})
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
for k, v in self.default_litellm_params.items(): for k, v in self.default_litellm_params.items():
@ -358,7 +358,7 @@ class Router:
self.print_verbose(f'Async Response: {response}') self.print_verbose(f'Async Response: {response}')
return response return response
except Exception as e: except Exception as e:
self.print_verbose(f"An exception occurs") self.print_verbose(f"An exception occurs: {e}")
original_exception = e original_exception = e
try: try:
self.print_verbose(f"Trying to fallback b/w models") self.print_verbose(f"Trying to fallback b/w models")
@ -1031,6 +1031,8 @@ class Router:
model_id+= str(model["litellm_params"][key]) model_id+= str(model["litellm_params"][key])
model["litellm_params"]["model"] += "-ModelID-" + model_id model["litellm_params"]["model"] += "-ModelID-" + model_id
self.print_verbose(f"\n Initialized Model List {self.model_list}")
############ Users can either pass tpm/rpm as a litellm_param or a router param ########### ############ Users can either pass tpm/rpm as a litellm_param or a router param ###########
# for get_available_deployment, we use the litellm_param["rpm"] # for get_available_deployment, we use the litellm_param["rpm"]
# in this snippet we also set rpm to be a litellm_param # in this snippet we also set rpm to be a litellm_param
@ -1074,16 +1076,32 @@ class Router:
def get_available_deployment(self, def get_available_deployment(self,
model: str, model: str,
messages: Optional[List[Dict[str, str]]] = None, messages: Optional[List[Dict[str, str]]] = None,
input: Optional[Union[str, List]] = None): input: Optional[Union[str, List]] = None,
specific_deployment: Optional[bool] = False
):
""" """
Returns the deployment based on routing strategy Returns the deployment based on routing strategy
""" """
# users need to explicitly call a specific deployment, by setting `specific_deployment = True` as completion()/embedding() kwarg
# When this was no explicit we had several issues with fallbacks timing out
if specific_deployment == True:
# users can also specify a specific deployment name. At this point we should check if they are just trying to call a specific deployment
for deployment in self.model_list:
cleaned_model = litellm.utils.remove_model_id(deployment.get("litellm_params").get("model"))
if cleaned_model == model:
# User Passed a specific deployment name on their config.yaml, example azure/chat-gpt-v-2
# return the first deployment where the `model` matches the specificed deployment name
return deployment
raise ValueError(f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}")
## get healthy deployments ## get healthy deployments
### get all deployments ### get all deployments
### filter out the deployments currently cooling down ### filter out the deployments currently cooling down
healthy_deployments = [m for m in self.model_list if m["model_name"] == model] healthy_deployments = [m for m in self.model_list if m["model_name"] == model]
if len(healthy_deployments) == 0: if len(healthy_deployments) == 0:
# check if the user sent in a deployment name instead # check if the user sent in a deployment name instead
healthy_deployments = [m for m in self.model_list if m["litellm_params"]["model"] == model] healthy_deployments = [m for m in self.model_list if m["litellm_params"]["model"] == model]
self.print_verbose(f"initial list of deployments: {healthy_deployments}") self.print_verbose(f"initial list of deployments: {healthy_deployments}")
deployments_to_remove = [] deployments_to_remove = []
@ -1099,13 +1117,6 @@ class Router:
healthy_deployments.remove(deployment) healthy_deployments.remove(deployment)
self.print_verbose(f"healthy deployments: length {len(healthy_deployments)} {healthy_deployments}") self.print_verbose(f"healthy deployments: length {len(healthy_deployments)} {healthy_deployments}")
if len(healthy_deployments) == 0: if len(healthy_deployments) == 0:
# users can also specify a specific deployment name. At this point we should check if they are just trying to call a specific deployment
for deployment in self.model_list:
cleaned_model = litellm.utils.remove_model_id(deployment.get("litellm_params").get("model"))
if cleaned_model == model:
# User Passed a specific deployment name on their config.yaml, example azure/chat-gpt-v-2
# return the first deployment where the `model` matches the specificed deployment name
return deployment
raise ValueError("No models available") raise ValueError("No models available")
if litellm.model_alias_map and model in litellm.model_alias_map: if litellm.model_alias_map and model in litellm.model_alias_map:
model = litellm.model_alias_map[ model = litellm.model_alias_map[