From 9747cc5aade054944ed7b52dc40a539bd06fddd7 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 27 Nov 2023 12:12:52 -0800 Subject: [PATCH] (feat) --health for checking config models --- litellm/proxy/proxy_cli.py | 9 ++++++++- litellm/proxy/proxy_server.py | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index b518675dd8..a76a49b2cc 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -78,11 +78,12 @@ def is_port_in_use(port): @click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') @click.option('--logs', flag_value=False, type=int, help='Gets the "n" most recent logs. By default gets most recent log.') +@click.option('--health', flag_value=True, help='Make a chat/completions request to all llms in config.yaml') @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') @click.option('--test_async', default=False, is_flag=True, help='Calls async endpoints /queue/requests and /queue/response') @click.option('--num_requests', default=10, type=int, help='Number of requests to hit async endpoint with') @click.option('--local', is_flag=True, default=False, help='for local debugging') -def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, num_workers, test_async, num_requests, use_queue): +def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, num_workers, test_async, num_requests, use_queue, health): global feature_telemetry args = locals() if local: @@ -177,6 +178,12 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers print(f"Successful Calls: {successful_calls}") print(f"Failed Calls: {failed_calls}") return + if health != False: + import requests + print("\nLiteLLM: Health Testing models in config") + response = requests.get(url=f"http://{host}:{port}/health") + print(json.dumps(response.json(), indent=4)) + return if test != False: click.echo('\nLiteLLM: Making a test ChatCompletions request to your proxy') import openai diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 6b363df3b9..fee09755cd 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -890,6 +890,7 @@ async def test_endpoint(request: Request): async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query(None, description="Specify the model name (optional)")): global llm_model_list healthy_endpoints = [] + unhealthy_endpoints = [] if llm_model_list: for model_name in llm_model_list: try: @@ -898,10 +899,22 @@ async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query if litellm_params["model"] not in litellm.all_embedding_models: # filter out embedding models litellm_params["messages"] = [{"role": "user", "content": "Hey, how's it going?"}] litellm.completion(**litellm_params) - healthy_endpoints.append(litellm_params["model"]) + cleaned_params = {} + for key in litellm_params: + if key != "api_key" and key != "messages": + cleaned_params[key] = litellm_params[key] + healthy_endpoints.append(cleaned_params) except: + cleaned_params = {} + for key in litellm_params: + if key != "api_key" and key != "messages": + cleaned_params[key] = litellm_params[key] + unhealthy_endpoints.append(cleaned_params) pass - return {"healthy_endpoints": healthy_endpoints} + return { + "healthy_endpoints": healthy_endpoints, + "unhealthy_endpoints": unhealthy_endpoints + } @router.get("/") async def home(request: Request):