From 7ee089b5ca1aa8d382cf74726395541ac972e1fd Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 30 Nov 2023 09:08:11 -0800 Subject: [PATCH] fix(proxy_server.py): provide an endpoint that gives model-specific info from proxy --- litellm/proxy/proxy_server.py | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 67a60194a..2a539dcd7 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -790,6 +790,8 @@ async def embeddings(request: Request, user_api_key_dict: dict = Depends(user_ap except Exception as e: pass +#### KEY MANAGEMENT #### + @router.post("/key/generate", dependencies=[Depends(user_api_key_auth)]) async def generate_key_fn(request: Request): data = await request.json() @@ -849,6 +851,52 @@ async def info_key_fn(key: str = fastapi.Query(..., description="Key in the requ detail={"error": str(e)}, ) +#### MODEL MANAGEMENT #### + +#### [BETA] - This is a beta endpoint, format might change based on user feedback. - https://github.com/BerriAI/litellm/issues/933 +@router.get("/model/info", description="Provides more info about each model in /models, including config.yaml descriptions", tags=["model management"], dependencies=[Depends(user_api_key_auth)]) +async def model_info(request: Request): + global llm_model_list, general_settings + all_models = [] + for m in llm_model_list: + model_dict = {} + model_name = m["model_name"] + model_params = {} + for k,v in m["litellm_params"].items(): + if k == "api_key": # don't send the api key + continue + + if k == "model": + ########## remove -ModelID-XXXX from model ############## + original_model_string = v + # Find the index of "ModelID" in the string + index_of_model_id = original_model_string.find("-ModelID") + # Remove everything after "-ModelID" if it exists + if index_of_model_id != -1: + v = original_model_string[:index_of_model_id] + else: + v = original_model_string + + model_params[k] = v + + model_dict["model_name"] = model_name + model_dict["model_params"] = model_params + all_models.append(model_dict) + # all_models = list(set([m["model_name"] for m in llm_model_list])) + print_verbose(f"all_models: {all_models}") + return dict( + data=[ + { + "id": model, + "object": "model", + "created": 1677610602, + "owned_by": "openai", + } + for model in all_models + ], + object="list", + ) + pass #### EXPERIMENTAL QUEUING #### @router.post("/queue/request", dependencies=[Depends(user_api_key_auth)]) async def async_queue_request(request: Request): @@ -896,6 +944,7 @@ async def retrieve_server_log(request: Request): filepath = os.path.expanduser("~/.ollama/logs/server.log") return FileResponse(filepath) + #### BASIC ENDPOINTS #### @router.get("/test")