Further generalize Xi's changes

- introduce a slightly more general notion of an AutoRouted provider - the AutoRouted provider is associated with a RoutingTable provider - e.g. inference -> models - Introduced safety -> shields and memory -> memory_banks correspondences
2025-10-05 04:17:32 +00:00 · 2024-09-22 12:06:43 -07:00 · 2024-09-22 12:06:43 -07:00 · e1966b90d9
commit e1966b90d9
parent b8914bb56f
19 changed files with 559 additions and 388 deletions
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -4,14 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Any, Dict, List, Optional, Protocol
+from typing import List, Optional, Protocol

 from llama_models.llama3.api.datatypes import Model

 from llama_models.schema_utils import json_schema_type, webmethod
-from llama_stack.distribution.datatypes import GenericProviderConfig
 from pydantic import BaseModel, Field

+from llama_stack.distribution.datatypes import GenericProviderConfig
+

@json_schema_type
 class ModelServingSpec(BaseModel):
@ -21,25 +22,11 @@ class ModelServingSpec(BaseModel):
    provider_config: GenericProviderConfig = Field(
        description="Provider config for the model, including provider_id, and corresponding config. ",
    )
-    api: str = Field(
-        description="The API that this model is serving (e.g. inference / safety).",
-        default="inference",
-    )
-
-
-@json_schema_type
-class ModelsListResponse(BaseModel):
-    models_list: List[ModelServingSpec]
-
-
-@json_schema_type
-class ModelsGetResponse(BaseModel):
-    core_model_spec: Optional[ModelServingSpec] = None


 class Models(Protocol):
    @webmethod(route="/models/list", method="GET")
-    async def list_models(self) -> ModelsListResponse: ...
+    async def list_models(self) -> List[ModelServingSpec]: ...

-    @webmethod(route="/models/get", method="POST")
-    async def get_model(self, core_model_id: str) -> ModelsGetResponse: ...
+    @webmethod(route="/models/get", method="GET")
+    async def get_model(self, core_model_id: str) -> Optional[ModelsGetResponse]: ...