diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index 03dd91731..c67302e0d 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -161,4 +161,46 @@ Example Response: ```json "I'm alive!" +``` + +## Advanced - Call specific models + +To check health of specific models, here's how to call them: + +### 1. Get model id via `/model/info` + +```bash +curl -X GET 'http://0.0.0.0:4000/v1/model/info' \ +--header 'Authorization: Bearer sk-1234' \ +``` + +**Expected Response** + +```bash +{ + "model_name": "bedrock-anthropic-claude-3", + "litellm_params": { + "model": "anthropic.claude-3-sonnet-20240229-v1:0" + }, + "model_info": { + "id": "634b87c444..", # 👈 UNIQUE MODEL ID +} +``` + +### 2. Call specific model via `/chat/completions` + +```bash +curl -X POST 'http://localhost:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-D '{ + "model": "634b87c444.." # 👈 UNIQUE MODEL ID + "messages": [ + { + "role": "user", + "content": "ping" + } + ], +} +' ``` \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 8217a210a..f0264a7e8 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3005,6 +3005,10 @@ async def chat_completion( llm_router is not None and data["model"] in router_model_names ): # model in router model list tasks.append(llm_router.acompletion(**data)) + elif ( + llm_router is not None and data["model"] in llm_router.get_model_ids() + ): # model in router model list + tasks.append(llm_router.acompletion(**data)) elif ( llm_router is not None and llm_router.model_group_alias is not None @@ -3256,6 +3260,10 @@ async def completion( llm_response = asyncio.create_task( llm_router.atext_completion(**data, specific_deployment=True) ) + elif ( + llm_router is not None and data["model"] in llm_router.get_model_ids() + ): # model in router model list + llm_response = asyncio.create_task(llm_router.atext_completion(**data)) elif ( llm_router is not None and data["model"] not in router_model_names @@ -3488,6 +3496,10 @@ async def embeddings( llm_router is not None and data["model"] in llm_router.deployment_names ): # model in router deployments, calling a specific deployment on the router response = await llm_router.aembedding(**data, specific_deployment=True) + elif ( + llm_router is not None and data["model"] in llm_router.get_model_ids() + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.aembedding(**data) elif ( llm_router is not None and data["model"] not in router_model_names diff --git a/litellm/router.py b/litellm/router.py index 638df2bf0..284cb3203 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -4295,6 +4295,15 @@ class Router: raise ValueError( f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}" ) + elif model in self.get_model_ids(): + deployment = self.get_model_info(id=model) + if deployment is not None: + deployment_model = deployment.get("litellm_params", {}).get("model") + return deployment_model, deployment + raise ValueError( + f"LiteLLM Router: Trying to call specific deployment, but Model ID :{model} does not exist in \ + Model ID List: {self.get_model_ids}" + ) if model in self.model_group_alias: verbose_router_logger.debug( diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 4cde1b55f..b84bc49d9 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -49,6 +49,27 @@ def test_router_multi_org_list(): assert len(router.get_model_list()) == 3 +def test_router_specific_model_via_id(): + """ + Call a specific deployment by it's id + """ + router = Router( + model_list=[ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": "my-fake-key", + "mock_response": "Hello world", + }, + "model_info": {"id": "1234"}, + } + ] + ) + + router.completion(model="1234", messages=[{"role": "user", "content": "Hey!"}]) + + def test_router_sensitive_keys(): try: router = Router(