From e4f302a8e29a5f3d23a56806e259cd2cfbc6af07 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 25 Nov 2023 18:28:39 -0800 Subject: [PATCH] fix(proxy_server.py): expose a /health endpoint --- litellm/__init__.py | 2 ++ litellm/proxy/proxy_server.py | 27 ++++++++++++++++++++++----- litellm/router.py | 9 ++------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index d08cdf153c..99d7455c53 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -340,6 +340,8 @@ cohere_embedding_models: List = [ ] bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"] +all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models + from .timeout import timeout from .utils import ( client, diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e43534c205..415a902345 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -830,11 +830,6 @@ async def info_key_fn(key: str = fastapi.Query(..., description="Key in the requ detail={"error": str(e)}, ) - -@router.get("/test") -async def test_endpoint(request: Request): - return {"route": request.url.path} - #### EXPERIMENTAL QUEUING #### @router.post("/queue/request", dependencies=[Depends(user_api_key_auth)]) async def async_queue_request(request: Request): @@ -882,6 +877,28 @@ async def retrieve_server_log(request: Request): filepath = os.path.expanduser("~/.ollama/logs/server.log") return FileResponse(filepath) +#### BASIC ENDPOINTS #### + +@router.get("/test") +async def test_endpoint(request: Request): + return {"route": request.url.path} + +@app.get("/health", description="Check the health of all the endpoints in config.yaml", tags=["health"]) +async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query(None, description="Specify the model name (optional)")): + global llm_model_list + healthy_endpoints = [] + if llm_model_list: + for model_name in llm_model_list: + try: + if model is None or model == model_name["litellm_params"]["model"]: # if model specified, just call that one. + litellm_params = model_name["litellm_params"] + if litellm_params["model"] not in litellm.all_embedding_models: # filter out embedding models + litellm_params["messages"] = [{"role": "user", "content": "Hey, how's it going?"}] + litellm.completion(**litellm_params) + healthy_endpoints.append(litellm_params["model"]) + except: + pass + return {"healthy_endpoints": healthy_endpoints} @router.get("/") async def home(request: Request): diff --git a/litellm/router.py b/litellm/router.py index b4a21ccb5d..1ec581a0b0 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -301,9 +301,7 @@ class Router: return response except Exception as e: pass - else: - if self.fallbacks is None: - raise original_exception + elif self.fallbacks is not None: self.print_verbose(f"inside model fallbacks: {self.fallbacks}") for item in self.fallbacks: if list(item.keys())[0] == model_group: @@ -398,10 +396,7 @@ class Router: return response except Exception as e: pass - else: - if self.fallbacks is None: - raise original_exception - + elif self.fallbacks is not None: self.print_verbose(f"inside model fallbacks: {self.fallbacks}") fallback_model_group = None for item in self.fallbacks: