forked from phoenix/litellm-mirror
(feat) --health for checking config models
This commit is contained in:
parent
56bb39e52c
commit
9747cc5aad
2 changed files with 23 additions and 3 deletions
|
@ -78,11 +78,12 @@ def is_port_in_use(port):
|
||||||
@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`')
|
@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`')
|
||||||
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
|
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
|
||||||
@click.option('--logs', flag_value=False, type=int, help='Gets the "n" most recent logs. By default gets most recent log.')
|
@click.option('--logs', flag_value=False, type=int, help='Gets the "n" most recent logs. By default gets most recent log.')
|
||||||
|
@click.option('--health', flag_value=True, help='Make a chat/completions request to all llms in config.yaml')
|
||||||
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
|
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
|
||||||
@click.option('--test_async', default=False, is_flag=True, help='Calls async endpoints /queue/requests and /queue/response')
|
@click.option('--test_async', default=False, is_flag=True, help='Calls async endpoints /queue/requests and /queue/response')
|
||||||
@click.option('--num_requests', default=10, type=int, help='Number of requests to hit async endpoint with')
|
@click.option('--num_requests', default=10, type=int, help='Number of requests to hit async endpoint with')
|
||||||
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
||||||
def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, num_workers, test_async, num_requests, use_queue):
|
def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, num_workers, test_async, num_requests, use_queue, health):
|
||||||
global feature_telemetry
|
global feature_telemetry
|
||||||
args = locals()
|
args = locals()
|
||||||
if local:
|
if local:
|
||||||
|
@ -177,6 +178,12 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
|
||||||
print(f"Successful Calls: {successful_calls}")
|
print(f"Successful Calls: {successful_calls}")
|
||||||
print(f"Failed Calls: {failed_calls}")
|
print(f"Failed Calls: {failed_calls}")
|
||||||
return
|
return
|
||||||
|
if health != False:
|
||||||
|
import requests
|
||||||
|
print("\nLiteLLM: Health Testing models in config")
|
||||||
|
response = requests.get(url=f"http://{host}:{port}/health")
|
||||||
|
print(json.dumps(response.json(), indent=4))
|
||||||
|
return
|
||||||
if test != False:
|
if test != False:
|
||||||
click.echo('\nLiteLLM: Making a test ChatCompletions request to your proxy')
|
click.echo('\nLiteLLM: Making a test ChatCompletions request to your proxy')
|
||||||
import openai
|
import openai
|
||||||
|
|
|
@ -890,6 +890,7 @@ async def test_endpoint(request: Request):
|
||||||
async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query(None, description="Specify the model name (optional)")):
|
async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query(None, description="Specify the model name (optional)")):
|
||||||
global llm_model_list
|
global llm_model_list
|
||||||
healthy_endpoints = []
|
healthy_endpoints = []
|
||||||
|
unhealthy_endpoints = []
|
||||||
if llm_model_list:
|
if llm_model_list:
|
||||||
for model_name in llm_model_list:
|
for model_name in llm_model_list:
|
||||||
try:
|
try:
|
||||||
|
@ -898,10 +899,22 @@ async def health_endpoint(request: Request, model: Optional[str] = fastapi.Query
|
||||||
if litellm_params["model"] not in litellm.all_embedding_models: # filter out embedding models
|
if litellm_params["model"] not in litellm.all_embedding_models: # filter out embedding models
|
||||||
litellm_params["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
litellm_params["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
litellm.completion(**litellm_params)
|
litellm.completion(**litellm_params)
|
||||||
healthy_endpoints.append(litellm_params["model"])
|
cleaned_params = {}
|
||||||
|
for key in litellm_params:
|
||||||
|
if key != "api_key" and key != "messages":
|
||||||
|
cleaned_params[key] = litellm_params[key]
|
||||||
|
healthy_endpoints.append(cleaned_params)
|
||||||
except:
|
except:
|
||||||
|
cleaned_params = {}
|
||||||
|
for key in litellm_params:
|
||||||
|
if key != "api_key" and key != "messages":
|
||||||
|
cleaned_params[key] = litellm_params[key]
|
||||||
|
unhealthy_endpoints.append(cleaned_params)
|
||||||
pass
|
pass
|
||||||
return {"healthy_endpoints": healthy_endpoints}
|
return {
|
||||||
|
"healthy_endpoints": healthy_endpoints,
|
||||||
|
"unhealthy_endpoints": unhealthy_endpoints
|
||||||
|
}
|
||||||
|
|
||||||
@router.get("/")
|
@router.get("/")
|
||||||
async def home(request: Request):
|
async def home(request: Request):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue