llama-stack-mirror/llama_stack/apis/models/client.py
Dinesh Yeduguru efe791bab7
Support model resource updates and deletes (#452)
# What does this PR do?
* Changes the registry to store only one RoutableObject per identifier.
Before it was a list, which is not really required.
* Adds impl for updates and deletes
* Updates routing table to handle updates correctly



## Test Plan
```
❯ llama-stack-client models list
+------------------------+---------------+------------------------------------+------------+
| identifier             | provider_id   | provider_resource_id               | metadata   |
+========================+===============+====================================+============+
| Llama3.1-405B-Instruct | fireworks-0   | fireworks/llama-v3p1-405b-instruct | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.1-8B-Instruct   | fireworks-0   | fireworks/llama-v3p1-8b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.2-3B-Instruct   | fireworks-0   | fireworks/llama-v3p2-1b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+
❯ llama-stack-client models register dineshyv-model --provider-model-id=fireworks/llama-v3p1-70b-instruct
Successfully registered model dineshyv-model
❯ llama-stack-client models list
+------------------------+---------------+------------------------------------+------------+
| identifier             | provider_id   | provider_resource_id               | metadata   |
+========================+===============+====================================+============+
| Llama3.1-405B-Instruct | fireworks-0   | fireworks/llama-v3p1-405b-instruct | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.1-8B-Instruct   | fireworks-0   | fireworks/llama-v3p1-8b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.2-3B-Instruct   | fireworks-0   | fireworks/llama-v3p2-1b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+
| dineshyv-model         | fireworks-0   | fireworks/llama-v3p1-70b-instruct  | {}         |
+------------------------+---------------+------------------------------------+------------+
❯ llama-stack-client models update dineshyv-model --provider-model-id=fireworks/llama-v3p1-405b-instruct
Successfully updated model dineshyv-model
❯ llama-stack-client models list
+------------------------+---------------+------------------------------------+------------+
| identifier             | provider_id   | provider_resource_id               | metadata   |
+========================+===============+====================================+============+
| Llama3.1-405B-Instruct | fireworks-0   | fireworks/llama-v3p1-405b-instruct | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.1-8B-Instruct   | fireworks-0   | fireworks/llama-v3p1-8b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.2-3B-Instruct   | fireworks-0   | fireworks/llama-v3p2-1b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+
| dineshyv-model         | fireworks-0   | fireworks/llama-v3p1-405b-instruct | {}         |
+------------------------+---------------+------------------------------------+------------+
llama-stack-client models delete dineshyv-model
❯ llama-stack-client models list
+------------------------+---------------+------------------------------------+------------+
| identifier             | provider_id   | provider_resource_id               | metadata   |
+========================+===============+====================================+============+
| Llama3.1-405B-Instruct | fireworks-0   | fireworks/llama-v3p1-405b-instruct | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.1-8B-Instruct   | fireworks-0   | fireworks/llama-v3p1-8b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+
| Llama3.2-3B-Instruct   | fireworks-0   | fireworks/llama-v3p2-1b-instruct   | {}         |
+------------------------+---------------+------------------------------------+------------+

```

---------

Co-authored-by: Dinesh Yeduguru <dineshyv@fb.com>
2024-11-13 21:55:41 -08:00

113 lines
3.5 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
import json
from typing import Any, Dict, List, Optional
import fire
import httpx
from termcolor import cprint
from .models import * # noqa: F403
class ModelsClient(Models):
def __init__(self, base_url: str):
self.base_url = base_url
async def initialize(self) -> None:
pass
async def shutdown(self) -> None:
pass
async def list_models(self) -> List[Model]:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{self.base_url}/models/list",
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
return [Model(**x) for x in response.json()]
async def register_model(self, model: Model) -> None:
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.base_url}/models/register",
json={
"model": json.loads(model.json()),
},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
async def get_model(self, identifier: str) -> Optional[Model]:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{self.base_url}/models/get",
params={
"identifier": identifier,
},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
j = response.json()
if j is None:
return None
return Model(**j)
async def update_model(
self,
model_id: str,
provider_model_id: Optional[str] = None,
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> Model:
async with httpx.AsyncClient() as client:
response = await client.put(
f"{self.base_url}/models/update",
json={
"model_id": model_id,
"provider_model_id": provider_model_id,
"provider_id": provider_id,
"metadata": metadata,
},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
return Model(**response.json())
async def delete_model(self, model_id: str) -> None:
async with httpx.AsyncClient() as client:
response = await client.delete(
f"{self.base_url}/models/delete",
params={"model_id": model_id},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
async def run_main(host: str, port: int, stream: bool):
client = ModelsClient(f"http://{host}:{port}")
response = await client.list_models()
cprint(f"list_models response={response}", "green")
response = await client.get_model("Llama3.1-8B-Instruct")
cprint(f"get_model response={response}", "blue")
response = await client.get_model("Llama-Guard-3-1B")
cprint(f"get_model response={response}", "red")
def main(host: str, port: int, stream: bool = True):
asyncio.run(run_main(host, port, stream))
if __name__ == "__main__":
fire.Fire(main)