forked from phoenix-oss/llama-stack-mirror
The semantics of an Update on resources is very tricky to reason about especially for memory banks and models. The best way to go forward here is for the user to unregister and register a new resource. We don't have a compelling reason to support update APIs. Tests: pytest -v -s llama_stack/providers/tests/memory/test_memory.py -m "chroma" --env CHROMA_HOST=localhost --env CHROMA_PORT=8000 pytest -v -s llama_stack/providers/tests/memory/test_memory.py -m "pgvector" --env PGVECTOR_DB=postgres --env PGVECTOR_USER=postgres --env PGVECTOR_PASSWORD=mysecretpassword --env PGVECTOR_HOST=0.0.0.0 $CONDA_PREFIX/bin/pytest -v -s -m "ollama" llama_stack/providers/tests/inference/test_model_registration.py --------- Co-authored-by: Dinesh Yeduguru <dineshyv@fb.com>
92 lines
2.8 KiB
Python
92 lines
2.8 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import asyncio
|
|
import json
|
|
|
|
from typing import List, Optional
|
|
|
|
import fire
|
|
import httpx
|
|
from termcolor import cprint
|
|
|
|
from .models import * # noqa: F403
|
|
|
|
|
|
class ModelsClient(Models):
|
|
def __init__(self, base_url: str):
|
|
self.base_url = base_url
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
pass
|
|
|
|
async def list_models(self) -> List[Model]:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(
|
|
f"{self.base_url}/models/list",
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
response.raise_for_status()
|
|
return [Model(**x) for x in response.json()]
|
|
|
|
async def register_model(self, model: Model) -> None:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(
|
|
f"{self.base_url}/models/register",
|
|
json={
|
|
"model": json.loads(model.json()),
|
|
},
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
response.raise_for_status()
|
|
|
|
async def get_model(self, identifier: str) -> Optional[Model]:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(
|
|
f"{self.base_url}/models/get",
|
|
params={
|
|
"identifier": identifier,
|
|
},
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
response.raise_for_status()
|
|
j = response.json()
|
|
if j is None:
|
|
return None
|
|
return Model(**j)
|
|
|
|
async def unregister_model(self, model_id: str) -> None:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.delete(
|
|
f"{self.base_url}/models/delete",
|
|
params={"model_id": model_id},
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
response.raise_for_status()
|
|
|
|
|
|
async def run_main(host: str, port: int, stream: bool):
|
|
client = ModelsClient(f"http://{host}:{port}")
|
|
|
|
response = await client.list_models()
|
|
cprint(f"list_models response={response}", "green")
|
|
|
|
response = await client.get_model("Llama3.1-8B-Instruct")
|
|
cprint(f"get_model response={response}", "blue")
|
|
|
|
response = await client.get_model("Llama-Guard-3-1B")
|
|
cprint(f"get_model response={response}", "red")
|
|
|
|
|
|
def main(host: str, port: int, stream: bool = True):
|
|
asyncio.run(run_main(host, port, stream))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fire.Fire(main)
|