skeleton models api

2025-12-05 02:17:31 +00:00 · 2024-09-19 16:26:24 -07:00 · 2024-09-19 16:26:24 -07:00 · 68131afc86
commit 68131afc86
parent 59af1c8fec
9 changed files with 233 additions and 10 deletions
--- a/llama_stack/apis/models/clients.py
+++ b/llama_stack/apis/models/clients.py
@ -0,0 +1,72 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import json
+from pathlib import Path
+
+from typing import Any, Dict, List, Optional
+
+import fire
+import httpx
+
+from llama_stack.distribution.datatypes import RemoteProviderConfig
+from termcolor import cprint
+
+from .models import *  # noqa: F403
+
+
+class ModelsClient(Models):
+    def __init__(self, base_url: str):
+        self.base_url = base_url
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
+    async def list_models(self) -> List[ModelSpec]:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{self.base_url}/models/list",
+                headers={"Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+            return ModelsListResponse(**response.json())
+
+    async def get_model(self, core_model_id: str) -> List[ModelSpec]:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{self.base_url}/models/get",
+                json={
+                    "core_model_id": core_model_id,
+                },
+                headers={"Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+            return ModelsGetResponse(**response.json())
+
+
+async def run_main(host: str, port: int, stream: bool):
+    client = ModelsClient(f"http://{host}:{port}")
+
+    response = await client.list_models()
+    cprint(f"list_models response={response}", "green")
+
+    response = await client.get_model("Meta-Llama3.1-8B-Instruct")
+    cprint(f"get_model response={response}", "blue")
+
+    response = await client.get_model("Llama-Guard-3-8B")
+    cprint(f"get_model response={response}", "red")
+
+
+def main(host: str, port: int, stream: bool = True):
+    asyncio.run(run_main(host, port, stream))
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -1,14 +1,51 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Protocol
+from typing import Dict, List, Optional, Protocol

-from llama_models.schema_utils import webmethod  # noqa: F401
+from llama_models.llama3.api.datatypes import *  # noqa: F403

-from pydantic import BaseModel  # noqa: F401
+from llama_models.schema_utils import json_schema_type, webmethod
+from pydantic import BaseModel, Field


-class Models(Protocol): ...
+@json_schema_type
+class ModelSpec(BaseModel):
+    llama_model_metadata: Model = Field(
+        description="All metadatas associated with llama model (defined in llama_models.models.sku_list). "
+    )
+    providers_spec: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Map of API to the concrete provider specs. E.g. {}".format(
+            {
+                "inference": {
+                    "provider_type": "remote::8080",
+                    "url": "localhost::5555",
+                    "api_token": "hf_xxx",
+                },
+            }
+        ),
+    )
+
+
+@json_schema_type
+class ModelsListResponse(BaseModel):
+    models_list: List[ModelSpec]
+
+
+@json_schema_type
+class ModelsGetResponse(BaseModel):
+    core_model_spec: Optional[ModelSpec] = None
+
+
+@json_schema_type
+class ModelsRegisterResponse(BaseModel):
+    core_model_spec: Optional[ModelSpec] = None
+
+
+class Models(Protocol):
+    @webmethod(route="/models/list", method="GET")
+    async def list_models(self) -> ModelsListResponse: ...
+
+    @webmethod(route="/models/get", method="POST")
+    async def get_model(self, core_model_id: str) -> ModelsGetResponse: ...