forked from phoenix/litellm-mirror
fix(proxy_server.py): expose a /health endpoint
This commit is contained in:
parent
2c4f52b6b1
commit
e4f302a8e2
3 changed files with 26 additions and 12 deletions
|
@ -340,6 +340,8 @@ cohere_embedding_models: List = [
|
||||||
]
|
]
|
||||||
bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"]
|
bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"]
|
||||||
|
|
||||||
|
all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
|
||||||
|
|
||||||
from .timeout import timeout
|
from .timeout import timeout
|
||||||
from .utils import (
|
from .utils import (
|
||||||
client,
|
client,
|
||||||
|
|
|
@ -830,11 +830,6 @@ async def info_key_fn(key: str = fastapi.Query(..., description="Key in the requ
|
||||||
detail={"error": str(e)},
|
detail={"error": str(e)},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/test")
|
|
||||||
async def test_endpoint(request: Request):
|
|
||||||
return {"route": request.url.path}
|
|
||||||
|
|
||||||
#### EXPERIMENTAL QUEUING ####
|
#### EXPERIMENTAL QUEUING ####
|
||||||
@router.post("/queue/request", dependencies=[Depends(user_api_key_auth)])
|
@router.post("/queue/request", dependencies=[Depends(user_api_key_auth)])
|
||||||
async def async_queue_request(request: Request):
|
async def async_queue_request(request: Request):
|
||||||
|
@ -882,6 +877,28 @@ async def retrieve_server_log(request: Request):
|
||||||
filepath = os.path.expanduser("~/.ollama/logs/server.log")
|
filepath = os.path.expanduser("~/.ollama/logs/server.log")
|
||||||
return FileResponse(filepath)
|
return FileResponse(filepath)
|
||||||
|
|
||||||
|
#### BASIC ENDPOINTS ####
|
||||||
|
|
||||||
|
@router.get("/test")
|
||||||
|
async def test_endpoint(request: Request):
|
||||||
|
return {"route": request.url.path}
|
||||||
|
|
||||||
|
@app.get("/health", description="Check the health of all the endpoints in config.yaml", tags=["health"])
|
||||||
|
async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query(None, description="Specify the model name (optional)")):
|
||||||
|
global llm_model_list
|
||||||
|
healthy_endpoints = []
|
||||||
|
if llm_model_list:
|
||||||
|
for model_name in llm_model_list:
|
||||||
|
try:
|
||||||
|
if model is None or model == model_name["litellm_params"]["model"]: # if model specified, just call that one.
|
||||||
|
litellm_params = model_name["litellm_params"]
|
||||||
|
if litellm_params["model"] not in litellm.all_embedding_models: # filter out embedding models
|
||||||
|
litellm_params["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
litellm.completion(**litellm_params)
|
||||||
|
healthy_endpoints.append(litellm_params["model"])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return {"healthy_endpoints": healthy_endpoints}
|
||||||
|
|
||||||
@router.get("/")
|
@router.get("/")
|
||||||
async def home(request: Request):
|
async def home(request: Request):
|
||||||
|
|
|
@ -301,9 +301,7 @@ class Router:
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
else:
|
elif self.fallbacks is not None:
|
||||||
if self.fallbacks is None:
|
|
||||||
raise original_exception
|
|
||||||
self.print_verbose(f"inside model fallbacks: {self.fallbacks}")
|
self.print_verbose(f"inside model fallbacks: {self.fallbacks}")
|
||||||
for item in self.fallbacks:
|
for item in self.fallbacks:
|
||||||
if list(item.keys())[0] == model_group:
|
if list(item.keys())[0] == model_group:
|
||||||
|
@ -398,10 +396,7 @@ class Router:
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
else:
|
elif self.fallbacks is not None:
|
||||||
if self.fallbacks is None:
|
|
||||||
raise original_exception
|
|
||||||
|
|
||||||
self.print_verbose(f"inside model fallbacks: {self.fallbacks}")
|
self.print_verbose(f"inside model fallbacks: {self.fallbacks}")
|
||||||
fallback_model_group = None
|
fallback_model_group = None
|
||||||
for item in self.fallbacks:
|
for item in self.fallbacks:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue