Further generalize Xi's changes (#88)

* Further generalize Xi's changes - introduce a slightly more general notion of an AutoRouted provider - the AutoRouted provider is associated with a RoutingTable provider - e.g. inference -> models - Introduced safety -> shields and memory -> memory_banks correspondences * typo * Basic build and run succeeded
2025-12-04 10:10:36 +00:00 · 2024-09-22 16:31:18 -07:00 · 2024-09-22 16:31:18 -07:00 · c1ab66f1e6
commit c1ab66f1e6
parent b8914bb56f
21 changed files with 597 additions and 418 deletions
--- a/llama_stack/apis/models/client.py
+++ b/llama_stack/apis/models/client.py
@ -5,15 +5,11 @@
 # the root directory of this source tree.

 import asyncio
-import json
-from pathlib import Path

-from typing import Any, Dict, List, Optional
+from typing import List, Optional

 import fire
 import httpx
-
-from llama_stack.distribution.datatypes import RemoteProviderConfig
 from termcolor import cprint

 from .models import *  # noqa: F403
@ -29,18 +25,18 @@ class ModelsClient(Models):
    async def shutdown(self) -> None:
        pass

-    async def list_models(self) -> ModelsListResponse:
+    async def list_models(self) -> List[ModelServingSpec]:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"{self.base_url}/models/list",
                headers={"Content-Type": "application/json"},
            )
            response.raise_for_status()
-            return ModelsListResponse(**response.json())
+            return [ModelServingSpec(**x) for x in response.json()]

-    async def get_model(self, core_model_id: str) -> ModelsGetResponse:
+    async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]:
        async with httpx.AsyncClient() as client:
-            response = await client.post(
+            response = await client.get(
                f"{self.base_url}/models/get",
                json={
                    "core_model_id": core_model_id,
@ -48,7 +44,10 @@ class ModelsClient(Models):
                headers={"Content-Type": "application/json"},
            )
            response.raise_for_status()
-            return ModelsGetResponse(**response.json())
+            j = response.json()
+            if j is None:
+                return None
+            return ModelServingSpec(**j)


 async def run_main(host: str, port: int, stream: bool):