[tests] add client-sdk pytests & delete client.py (#638)

# What does this PR do? **Why** - Clean up examples which we will not maintain; reduce the surface area to the minimal showcases **What** - Delete `client.py` in /apis/* - Move all scripts to unit tests - SDK sync in the future will just require running pytests **Side notes** - `bwrap` not available on Mac so code_interpreter will not work ## Test Plan ``` LLAMA_STACK_BASE_URL=http://localhost:5000 pytest -v ./tests/client-sdk ``` <img width="725" alt="image" src="https://github.com/user-attachments/assets/36bfe537-628d-43c3-8479-dcfcfe2e4035" /> ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
2024-12-16 12:04:56 -08:00 · 2024-12-16 12:04:56 -08:00 · 78e2bfbe7a
commit 78e2bfbe7a
parent cb8a28c128
23 changed files with 557 additions and 1514 deletions
--- a/llama_stack/apis/memory/client.py
+++ b/llama_stack/apis/memory/client.py
@ -1,163 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import os
-from pathlib import Path
-
-from typing import Any, Dict, List, Optional
-
-import fire
-import httpx
-
-from llama_stack.distribution.datatypes import RemoteProviderConfig
-
-from llama_stack.apis.memory import *  # noqa: F403
-from llama_stack.apis.memory_banks.client import MemoryBanksClient
-from llama_stack.providers.utils.memory.file_utils import data_url_from_file
-
-
-async def get_client_impl(config: RemoteProviderConfig, _deps: Any) -> Memory:
-    return MemoryClient(config.url)
-
-
-class MemoryClient(Memory):
-    def __init__(self, base_url: str):
-        self.base_url = base_url
-
-    async def initialize(self) -> None:
-        pass
-
-    async def shutdown(self) -> None:
-        pass
-
-    async def insert_documents(
-        self,
-        bank_id: str,
-        documents: List[MemoryBankDocument],
-    ) -> None:
-        async with httpx.AsyncClient() as client:
-            r = await client.post(
-                f"{self.base_url}/memory/insert",
-                json={
-                    "bank_id": bank_id,
-                    "documents": [d.dict() for d in documents],
-                },
-                headers={"Content-Type": "application/json"},
-                timeout=20,
-            )
-            r.raise_for_status()
-
-    async def query_documents(
-        self,
-        bank_id: str,
-        query: InterleavedTextMedia,
-        params: Optional[Dict[str, Any]] = None,
-    ) -> QueryDocumentsResponse:
-        async with httpx.AsyncClient() as client:
-            r = await client.post(
-                f"{self.base_url}/memory/query",
-                json={
-                    "bank_id": bank_id,
-                    "query": query,
-                    "params": params,
-                },
-                headers={"Content-Type": "application/json"},
-                timeout=20,
-            )
-            r.raise_for_status()
-            return QueryDocumentsResponse(**r.json())
-
-
-async def run_main(host: str, port: int, stream: bool):
-    banks_client = MemoryBanksClient(f"http://{host}:{port}")
-
-    bank = VectorMemoryBank(
-        identifier="test_bank",
-        provider_id="",
-        embedding_model="all-MiniLM-L6-v2",
-        chunk_size_in_tokens=512,
-        overlap_size_in_tokens=64,
-    )
-    await banks_client.register_memory_bank(
-        bank.identifier,
-        VectorMemoryBankParams(
-            embedding_model="all-MiniLM-L6-v2",
-            chunk_size_in_tokens=512,
-            overlap_size_in_tokens=64,
-        ),
-        provider_resource_id=bank.identifier,
-    )
-
-    retrieved_bank = await banks_client.get_memory_bank(bank.identifier)
-    assert retrieved_bank is not None
-    assert retrieved_bank.embedding_model == "all-MiniLM-L6-v2"
-
-    urls = [
-        "memory_optimizations.rst",
-        "chat.rst",
-        "llama3.rst",
-        "datasets.rst",
-        "qat_finetune.rst",
-        "lora_finetune.rst",
-    ]
-    documents = [
-        MemoryBankDocument(
-            document_id=f"num-{i}",
-            content=URL(
-                uri=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}"
-            ),
-            mime_type="text/plain",
-        )
-        for i, url in enumerate(urls)
-    ]
-
-    this_dir = os.path.dirname(__file__)
-    files = [Path(this_dir).parent.parent.parent / "CONTRIBUTING.md"]
-    documents += [
-        MemoryBankDocument(
-            document_id=f"num-{i}",
-            content=data_url_from_file(path),
-        )
-        for i, path in enumerate(files)
-    ]
-
-    client = MemoryClient(f"http://{host}:{port}")
-
-    # insert some documents
-    await client.insert_documents(
-        bank_id=bank.identifier,
-        documents=documents,
-    )
-
-    # query the documents
-    response = await client.query_documents(
-        bank_id=bank.identifier,
-        query=[
-            "How do I use Lora?",
-        ],
-    )
-    for chunk, score in zip(response.chunks, response.scores):
-        print(f"Score: {score}")
-        print(f"Chunk:\n========\n{chunk}\n========\n")
-
-    response = await client.query_documents(
-        bank_id=bank.identifier,
-        query=[
-            "Tell me more about llama3 and torchtune",
-        ],
-    )
-    for chunk, score in zip(response.chunks, response.scores):
-        print(f"Score: {score}")
-        print(f"Chunk:\n========\n{chunk}\n========\n")
-
-
-def main(host: str, port: int, stream: bool = True):
-    asyncio.run(run_main(host, port, stream))
-
-
-if __name__ == "__main__":
-    fire.Fire(main)