forked from phoenix/litellm-mirror
Merge pull request #4290 from BerriAI/litellm_specific_deployment
feat(router.py): allow user to call specific deployment via id
This commit is contained in:
commit
f86290584a
4 changed files with 84 additions and 0 deletions
|
@ -161,4 +161,46 @@ Example Response:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"I'm alive!"
|
"I'm alive!"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced - Call specific models
|
||||||
|
|
||||||
|
To check health of specific models, here's how to call them:
|
||||||
|
|
||||||
|
### 1. Get model id via `/model/info`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X GET 'http://0.0.0.0:4000/v1/model/info' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
"model_name": "bedrock-anthropic-claude-3",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "anthropic.claude-3-sonnet-20240229-v1:0"
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"id": "634b87c444..", # 👈 UNIQUE MODEL ID
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Call specific model via `/chat/completions`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "634b87c444.." # 👈 UNIQUE MODEL ID
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "ping"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
'
|
||||||
```
|
```
|
|
@ -3005,6 +3005,10 @@ async def chat_completion(
|
||||||
llm_router is not None and data["model"] in router_model_names
|
llm_router is not None and data["model"] in router_model_names
|
||||||
): # model in router model list
|
): # model in router model list
|
||||||
tasks.append(llm_router.acompletion(**data))
|
tasks.append(llm_router.acompletion(**data))
|
||||||
|
elif (
|
||||||
|
llm_router is not None and data["model"] in llm_router.get_model_ids()
|
||||||
|
): # model in router model list
|
||||||
|
tasks.append(llm_router.acompletion(**data))
|
||||||
elif (
|
elif (
|
||||||
llm_router is not None
|
llm_router is not None
|
||||||
and llm_router.model_group_alias is not None
|
and llm_router.model_group_alias is not None
|
||||||
|
@ -3256,6 +3260,10 @@ async def completion(
|
||||||
llm_response = asyncio.create_task(
|
llm_response = asyncio.create_task(
|
||||||
llm_router.atext_completion(**data, specific_deployment=True)
|
llm_router.atext_completion(**data, specific_deployment=True)
|
||||||
)
|
)
|
||||||
|
elif (
|
||||||
|
llm_router is not None and data["model"] in llm_router.get_model_ids()
|
||||||
|
): # model in router model list
|
||||||
|
llm_response = asyncio.create_task(llm_router.atext_completion(**data))
|
||||||
elif (
|
elif (
|
||||||
llm_router is not None
|
llm_router is not None
|
||||||
and data["model"] not in router_model_names
|
and data["model"] not in router_model_names
|
||||||
|
@ -3488,6 +3496,10 @@ async def embeddings(
|
||||||
llm_router is not None and data["model"] in llm_router.deployment_names
|
llm_router is not None and data["model"] in llm_router.deployment_names
|
||||||
): # model in router deployments, calling a specific deployment on the router
|
): # model in router deployments, calling a specific deployment on the router
|
||||||
response = await llm_router.aembedding(**data, specific_deployment=True)
|
response = await llm_router.aembedding(**data, specific_deployment=True)
|
||||||
|
elif (
|
||||||
|
llm_router is not None and data["model"] in llm_router.get_model_ids()
|
||||||
|
): # model in router deployments, calling a specific deployment on the router
|
||||||
|
response = await llm_router.aembedding(**data)
|
||||||
elif (
|
elif (
|
||||||
llm_router is not None
|
llm_router is not None
|
||||||
and data["model"] not in router_model_names
|
and data["model"] not in router_model_names
|
||||||
|
|
|
@ -4295,6 +4295,15 @@ class Router:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}"
|
f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}"
|
||||||
)
|
)
|
||||||
|
elif model in self.get_model_ids():
|
||||||
|
deployment = self.get_model_info(id=model)
|
||||||
|
if deployment is not None:
|
||||||
|
deployment_model = deployment.get("litellm_params", {}).get("model")
|
||||||
|
return deployment_model, deployment
|
||||||
|
raise ValueError(
|
||||||
|
f"LiteLLM Router: Trying to call specific deployment, but Model ID :{model} does not exist in \
|
||||||
|
Model ID List: {self.get_model_ids}"
|
||||||
|
)
|
||||||
|
|
||||||
if model in self.model_group_alias:
|
if model in self.model_group_alias:
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
|
|
|
@ -49,6 +49,27 @@ def test_router_multi_org_list():
|
||||||
assert len(router.get_model_list()) == 3
|
assert len(router.get_model_list()) == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_router_specific_model_via_id():
|
||||||
|
"""
|
||||||
|
Call a specific deployment by it's id
|
||||||
|
"""
|
||||||
|
router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-fake-key",
|
||||||
|
"mock_response": "Hello world",
|
||||||
|
},
|
||||||
|
"model_info": {"id": "1234"},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
router.completion(model="1234", messages=[{"role": "user", "content": "Hey!"}])
|
||||||
|
|
||||||
|
|
||||||
def test_router_sensitive_keys():
|
def test_router_sensitive_keys():
|
||||||
try:
|
try:
|
||||||
router = Router(
|
router = Router(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue