forked from phoenix/litellm-mirror
Merge pull request #3569 from BerriAI/litellm_fix_bug_upsert_deployments
[Fix] Upsert deployment bug
This commit is contained in:
commit
e3848abdfe
3 changed files with 35 additions and 25 deletions
|
@ -198,6 +198,7 @@ jobs:
|
||||||
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||||
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||||
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||||
|
-e AUTO_INFER_REGION=True \
|
||||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||||
-e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \
|
-e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \
|
||||||
-e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \
|
-e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \
|
||||||
|
|
|
@ -2558,20 +2558,27 @@ class Router:
|
||||||
self.set_client(model=deployment.to_json(exclude_none=True))
|
self.set_client(model=deployment.to_json(exclude_none=True))
|
||||||
|
|
||||||
# set region (if azure model)
|
# set region (if azure model)
|
||||||
try:
|
_auto_infer_region = os.environ.get("AUTO_INFER_REGION", False)
|
||||||
if "azure" in deployment.litellm_params.model:
|
if _auto_infer_region == True or _auto_infer_region == "True":
|
||||||
region = litellm.utils.get_model_region(
|
print("Auto inferring region") # noqa
|
||||||
litellm_params=deployment.litellm_params, mode=None
|
"""
|
||||||
)
|
Hiding behind a feature flag
|
||||||
|
When there is a large amount of LLM deployments this makes startup times blow up
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if "azure" in deployment.litellm_params.model:
|
||||||
|
region = litellm.utils.get_model_region(
|
||||||
|
litellm_params=deployment.litellm_params, mode=None
|
||||||
|
)
|
||||||
|
|
||||||
deployment.litellm_params.region_name = region
|
deployment.litellm_params.region_name = region
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_router_logger.error(
|
verbose_router_logger.error(
|
||||||
"Unable to get the region for azure model - {}, {}".format(
|
"Unable to get the region for azure model - {}, {}".format(
|
||||||
deployment.litellm_params.model, str(e)
|
deployment.litellm_params.model, str(e)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
pass # [NON-BLOCKING]
|
||||||
pass # [NON-BLOCKING]
|
|
||||||
|
|
||||||
return deployment
|
return deployment
|
||||||
|
|
||||||
|
@ -2600,7 +2607,7 @@ class Router:
|
||||||
self.model_names.append(deployment.model_name)
|
self.model_names.append(deployment.model_name)
|
||||||
return deployment
|
return deployment
|
||||||
|
|
||||||
def upsert_deployment(self, deployment: Deployment) -> Deployment:
|
def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]:
|
||||||
"""
|
"""
|
||||||
Add or update deployment
|
Add or update deployment
|
||||||
Parameters:
|
Parameters:
|
||||||
|
@ -2610,8 +2617,17 @@ class Router:
|
||||||
- The added/updated deployment
|
- The added/updated deployment
|
||||||
"""
|
"""
|
||||||
# check if deployment already exists
|
# check if deployment already exists
|
||||||
|
_deployment_model_id = deployment.model_info.id or ""
|
||||||
|
_deployment_on_router: Optional[Deployment] = self.get_deployment(
|
||||||
|
model_id=_deployment_model_id
|
||||||
|
)
|
||||||
|
if _deployment_on_router is not None:
|
||||||
|
# deployment with this model_id exists on the router
|
||||||
|
if deployment.litellm_params == _deployment_on_router.litellm_params:
|
||||||
|
# No need to update
|
||||||
|
return None
|
||||||
|
|
||||||
if deployment.model_info.id in self.get_model_ids():
|
# if there is a new litellm param -> then update the deployment
|
||||||
# remove the previous deployment
|
# remove the previous deployment
|
||||||
removal_idx: Optional[int] = None
|
removal_idx: Optional[int] = None
|
||||||
for idx, model in enumerate(self.model_list):
|
for idx, model in enumerate(self.model_list):
|
||||||
|
@ -2620,16 +2636,9 @@ class Router:
|
||||||
|
|
||||||
if removal_idx is not None:
|
if removal_idx is not None:
|
||||||
self.model_list.pop(removal_idx)
|
self.model_list.pop(removal_idx)
|
||||||
|
else:
|
||||||
# add to model list
|
# if the model_id is not in router
|
||||||
_deployment = deployment.to_json(exclude_none=True)
|
self.add_deployment(deployment=deployment)
|
||||||
self.model_list.append(_deployment)
|
|
||||||
|
|
||||||
# initialize client
|
|
||||||
self._add_deployment(deployment=deployment)
|
|
||||||
|
|
||||||
# add to model names
|
|
||||||
self.model_names.append(deployment.model_name)
|
|
||||||
return deployment
|
return deployment
|
||||||
|
|
||||||
def delete_deployment(self, id: str) -> Optional[Deployment]:
|
def delete_deployment(self, id: str) -> Optional[Deployment]:
|
||||||
|
|
|
@ -1141,7 +1141,7 @@ def test_get_hf_task_for_model():
|
||||||
|
|
||||||
|
|
||||||
# test_get_hf_task_for_model()
|
# test_get_hf_task_for_model()
|
||||||
# litellm.set_verbose=False
|
# litellm.set_verbose = False
|
||||||
# ################### Hugging Face TGI models ########################
|
# ################### Hugging Face TGI models ########################
|
||||||
# # TGI model
|
# # TGI model
|
||||||
# # this is a TGI model https://huggingface.co/glaiveai/glaive-coder-7b
|
# # this is a TGI model https://huggingface.co/glaiveai/glaive-coder-7b
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue