Merge pull request #4290 from BerriAI/litellm_specific_deployment

feat(router.py): allow user to call specific deployment via id
2024-06-20 20:36:13 -07:00 · 2024-06-20 20:36:13 -07:00 · f86290584a
commit f86290584a
parent 790d7057cb 27c045fed0
4 changed files with 84 additions and 0 deletions
--- a/docs/my-website/docs/proxy/health.md
+++ b/docs/my-website/docs/proxy/health.md
@ -162,3 +162,45 @@ Example Response:
 ```json
 "I'm alive!"
 ```
+
+## Advanced - Call specific models 
+
+To check health of specific models, here's how to call them: 
+
+### 1. Get model id via `/model/info` 
+
+```bash
+curl -X GET 'http://0.0.0.0:4000/v1/model/info' \
+--header 'Authorization: Bearer sk-1234' \
+```
+
+**Expected Response**
+
+```bash
+{
+    "model_name": "bedrock-anthropic-claude-3",
+    "litellm_params": {
+        "model": "anthropic.claude-3-sonnet-20240229-v1:0"
+    },
+    "model_info": {
+        "id": "634b87c444..", # 👈 UNIQUE MODEL ID
+}
+```
+
+### 2. Call specific model via `/chat/completions` 
+
+```bash
+curl -X POST 'http://localhost:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+  "model": "634b87c444.." # 👈 UNIQUE MODEL ID
+  "messages": [
+    {
+      "role": "user",
+      "content": "ping"
+    }
+  ],
+}
+'
+```
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3005,6 +3005,10 @@ async def chat_completion(
            llm_router is not None and data["model"] in router_model_names
        ):  # model in router model list
            tasks.append(llm_router.acompletion(**data))
+        elif (
+            llm_router is not None and data["model"] in llm_router.get_model_ids()
+        ):  # model in router model list
+            tasks.append(llm_router.acompletion(**data))
        elif (
            llm_router is not None
            and llm_router.model_group_alias is not None
@ -3256,6 +3260,10 @@ async def completion(
            llm_response = asyncio.create_task(
                llm_router.atext_completion(**data, specific_deployment=True)
            )
+        elif (
+            llm_router is not None and data["model"] in llm_router.get_model_ids()
+        ):  # model in router model list
+            llm_response = asyncio.create_task(llm_router.atext_completion(**data))
        elif (
            llm_router is not None
            and data["model"] not in router_model_names
@ -3488,6 +3496,10 @@ async def embeddings(
            llm_router is not None and data["model"] in llm_router.deployment_names
        ):  # model in router deployments, calling a specific deployment on the router
            response = await llm_router.aembedding(**data, specific_deployment=True)
+        elif (
+            llm_router is not None and data["model"] in llm_router.get_model_ids()
+        ):  # model in router deployments, calling a specific deployment on the router
+            response = await llm_router.aembedding(**data)
        elif (
            llm_router is not None
            and data["model"] not in router_model_names
--- a/litellm/router.py
+++ b/litellm/router.py
@ -4295,6 +4295,15 @@ class Router:
            raise ValueError(
                f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}"
            )
+        elif model in self.get_model_ids():
+            deployment = self.get_model_info(id=model)
+            if deployment is not None:
+                deployment_model = deployment.get("litellm_params", {}).get("model")
+                return deployment_model, deployment
+            raise ValueError(
+                f"LiteLLM Router: Trying to call specific deployment, but Model ID :{model} does not exist in \
+                    Model ID List: {self.get_model_ids}"
+            )

        if model in self.model_group_alias:
            verbose_router_logger.debug(
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -49,6 +49,27 @@ def test_router_multi_org_list():
    assert len(router.get_model_list()) == 3


+def test_router_specific_model_via_id():
+    """
+    Call a specific deployment by it's id
+    """
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-fake-key",
+                    "mock_response": "Hello world",
+                },
+                "model_info": {"id": "1234"},
+            }
+        ]
+    )
+
+    router.completion(model="1234", messages=[{"role": "user", "content": "Hey!"}])
+
+
 def test_router_sensitive_keys():
    try:
        router = Router(