[tests] add client-sdk pytests & delete client.py (#638)

# What does this PR do? **Why** - Clean up examples which we will not maintain; reduce the surface area to the minimal showcases **What** - Delete `client.py` in /apis/* - Move all scripts to unit tests - SDK sync in the future will just require running pytests **Side notes** - `bwrap` not available on Mac so code_interpreter will not work ## Test Plan ``` LLAMA_STACK_BASE_URL=http://localhost:5000 pytest -v ./tests/client-sdk ``` <img width="725" alt="image" src="https://github.com/user-attachments/assets/36bfe537-628d-43c3-8479-dcfcfe2e4035" /> ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
2025-12-03 09:53:45 +00:00 · 2024-12-16 12:04:56 -08:00 · 2024-12-16 12:04:56 -08:00 · 78e2bfbe7a
commit 78e2bfbe7a
parent cb8a28c128
23 changed files with 557 additions and 1514 deletions
--- a/llama_stack/apis/models/client.py
+++ b/llama_stack/apis/models/client.py
@ -1,92 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import json
-
-from typing import List, Optional
-
-import fire
-import httpx
-from termcolor import cprint
-
-from .models import *  # noqa: F403
-
-
-class ModelsClient(Models):
-    def __init__(self, base_url: str):
-        self.base_url = base_url
-
-    async def initialize(self) -> None:
-        pass
-
-    async def shutdown(self) -> None:
-        pass
-
-    async def list_models(self) -> List[Model]:
-        async with httpx.AsyncClient() as client:
-            response = await client.get(
-                f"{self.base_url}/models/list",
-                headers={"Content-Type": "application/json"},
-            )
-            response.raise_for_status()
-            return [Model(**x) for x in response.json()]
-
-    async def register_model(self, model: Model) -> None:
-        async with httpx.AsyncClient() as client:
-            response = await client.post(
-                f"{self.base_url}/models/register",
-                json={
-                    "model": json.loads(model.model_dump_json()),
-                },
-                headers={"Content-Type": "application/json"},
-            )
-            response.raise_for_status()
-
-    async def get_model(self, identifier: str) -> Optional[Model]:
-        async with httpx.AsyncClient() as client:
-            response = await client.get(
-                f"{self.base_url}/models/get",
-                params={
-                    "identifier": identifier,
-                },
-                headers={"Content-Type": "application/json"},
-            )
-            response.raise_for_status()
-            j = response.json()
-            if j is None:
-                return None
-            return Model(**j)
-
-    async def unregister_model(self, model_id: str) -> None:
-        async with httpx.AsyncClient() as client:
-            response = await client.delete(
-                f"{self.base_url}/models/delete",
-                params={"model_id": model_id},
-                headers={"Content-Type": "application/json"},
-            )
-            response.raise_for_status()
-
-
-async def run_main(host: str, port: int, stream: bool):
-    client = ModelsClient(f"http://{host}:{port}")
-
-    response = await client.list_models()
-    cprint(f"list_models response={response}", "green")
-
-    response = await client.get_model("Llama3.1-8B-Instruct")
-    cprint(f"get_model response={response}", "blue")
-
-    response = await client.get_model("Llama-Guard-3-1B")
-    cprint(f"get_model response={response}", "red")
-
-
-def main(host: str, port: int, stream: bool = True):
-    asyncio.run(run_main(host, port, stream))
-
-
-if __name__ == "__main__":
-    fire.Fire(main)