fix(proxy_server.py): expose a /health endpoint

2025-04-24 10:14:26 +00:00 · 2023-11-25 18:28:39 -08:00 · 2023-11-25 18:28:39 -08:00 · e4f302a8e2
commit e4f302a8e2
parent 2c4f52b6b1
3 changed files with 26 additions and 12 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -340,6 +340,8 @@ cohere_embedding_models: List = [
 ]
 bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"]

+all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
+
 from .timeout import timeout
 from .utils import (
    client,
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -830,11 +830,6 @@ async def info_key_fn(key: str = fastapi.Query(..., description="Key in the requ
            detail={"error": str(e)},
        )

-
-@router.get("/test")
-async def test_endpoint(request: Request): 
-    return {"route": request.url.path}
-
 #### EXPERIMENTAL QUEUING #### 
@router.post("/queue/request", dependencies=[Depends(user_api_key_auth)])
 async def async_queue_request(request: Request): 
@ -882,6 +877,28 @@ async def retrieve_server_log(request: Request):
    filepath = os.path.expanduser("~/.ollama/logs/server.log")
    return FileResponse(filepath)

+#### BASIC ENDPOINTS #### 
+
+@router.get("/test")
+async def test_endpoint(request: Request): 
+    return {"route": request.url.path}
+
+@app.get("/health", description="Check the health of all the endpoints in config.yaml", tags=["health"])
+async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query(None, description="Specify the model name (optional)")):
+    global llm_model_list
+    healthy_endpoints = []
+    if llm_model_list: 
+        for model_name in llm_model_list: 
+            try: 
+                if model is None or model == model_name["litellm_params"]["model"]: # if model specified, just call that one. 
+                    litellm_params = model_name["litellm_params"]
+                    if litellm_params["model"] not in litellm.all_embedding_models: # filter out embedding models
+                        litellm_params["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
+                        litellm.completion(**litellm_params)
+                        healthy_endpoints.append(litellm_params["model"])
+            except: 
+                pass
+    return {"healthy_endpoints": healthy_endpoints}

@router.get("/")
 async def home(request: Request):
--- a/litellm/router.py
+++ b/litellm/router.py
@ -301,9 +301,7 @@ class Router:
                            return response 
                        except Exception as e: 
                            pass
-                else: 
-                    if self.fallbacks is None: 
-                        raise original_exception
+                elif self.fallbacks is not None: 
                    self.print_verbose(f"inside model fallbacks: {self.fallbacks}")
                    for item in self.fallbacks:
                        if list(item.keys())[0] == model_group:
@ -398,10 +396,7 @@ class Router:
                            return response 
                        except Exception as e: 
                            pass
-                else: 
-                    if self.fallbacks is None: 
-                        raise original_exception
-
+                elif self.fallbacks is not None: 
                    self.print_verbose(f"inside model fallbacks: {self.fallbacks}")
                    fallback_model_group = None
                    for item in self.fallbacks: