From 20bf2f50c28f7f22d8c83449dea9a697e16e5fe1 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 15 Nov 2024 12:20:18 -0800 Subject: [PATCH 1/4] No more model_id warnings --- llama_stack/apis/models/models.py | 4 +++- llama_stack/distribution/server/server.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index a1bfcac00..aabe78d85 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -7,7 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable from llama_models.schema_utils import json_schema_type, webmethod -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from llama_stack.apis.resource import Resource, ResourceType @@ -37,6 +37,8 @@ class ModelInput(CommonModelFields): provider_id: Optional[str] = None provider_model_id: Optional[str] = None + model_config = ConfigDict(protected_namespaces=()) + @runtime_checkable class Models(Protocol): diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 5796b6c68..0cfd11eda 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -369,12 +369,16 @@ def main( impl_method = getattr(impl, endpoint.name) - getattr(app, endpoint.method)(endpoint.route, response_model=None)( - create_dynamic_typed_route( - impl_method, - endpoint.method, + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", category=UserWarning, module="pydantic._internal._fields" + ) + getattr(app, endpoint.method)(endpoint.route, response_model=None)( + create_dynamic_typed_route( + impl_method, + endpoint.method, + ) ) - ) cprint(f"Serving API {api_str}", "white", attrs=["bold"]) for endpoint in endpoints: From ff99025875b76119f37c2d90a2fd20ee3782384b Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Fri, 15 Nov 2024 14:21:31 -0800 Subject: [PATCH 2/4] await initialize in faiss (#463) tests: ``` torchrun $CONDA_PREFIX/bin/pytest -v -s -m "faiss" llama_stack/providers/tests/memory/test_memory.py ``` Co-authored-by: Dinesh Yeduguru --- .../providers/inline/memory/faiss/faiss.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/inline/memory/faiss/faiss.py b/llama_stack/providers/inline/memory/faiss/faiss.py index 92235ea89..07c42d389 100644 --- a/llama_stack/providers/inline/memory/faiss/faiss.py +++ b/llama_stack/providers/inline/memory/faiss/faiss.py @@ -45,7 +45,12 @@ class FaissIndex(EmbeddingIndex): self.chunk_by_index = {} self.kvstore = kvstore self.bank_id = bank_id - self.initialize() + + @classmethod + async def create(cls, dimension: int, kvstore=None, bank_id: str = None): + instance = cls(dimension, kvstore, bank_id) + await instance.initialize() + return instance async def initialize(self) -> None: if not self.kvstore: @@ -132,7 +137,10 @@ class FaissMemoryImpl(Memory, MemoryBanksProtocolPrivate): for bank_data in stored_banks: bank = VectorMemoryBank.model_validate_json(bank_data) index = BankWithIndex( - bank=bank, index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION, self.kvstore) + bank=bank, + index=await FaissIndex.create( + ALL_MINILM_L6_V2_DIMENSION, self.kvstore, bank.identifier + ), ) self.cache[bank.identifier] = index @@ -158,7 +166,9 @@ class FaissMemoryImpl(Memory, MemoryBanksProtocolPrivate): # Store in cache index = BankWithIndex( bank=memory_bank, - index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION, self.kvstore), + index=await FaissIndex.create( + ALL_MINILM_L6_V2_DIMENSION, self.kvstore, memory_bank.identifier + ), ) self.cache[memory_bank.identifier] = index From 57bafd0f8c61dcdff86701aeb2be40ef8175b953 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Fri, 15 Nov 2024 18:02:48 -0800 Subject: [PATCH 3/4] fix faiss serialize and serialize of index (#464) faiss serialize index returns a np object, that we first need to save to buffer and then write to sqllite. Since we are using json, we need to base64 encode the data. Same in the read path, we base64 decode and read into np array and then call into deserialize index. tests: torchrun $CONDA_PREFIX/bin/pytest -v -s -m "faiss" llama_stack/providers/tests/memory/test_memory.py Co-authored-by: Dinesh Yeduguru --- llama_stack/providers/inline/memory/faiss/faiss.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llama_stack/providers/inline/memory/faiss/faiss.py b/llama_stack/providers/inline/memory/faiss/faiss.py index 07c42d389..95791bc69 100644 --- a/llama_stack/providers/inline/memory/faiss/faiss.py +++ b/llama_stack/providers/inline/memory/faiss/faiss.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import base64 +import io import json import logging @@ -67,19 +68,20 @@ class FaissIndex(EmbeddingIndex): for k, v in data["chunk_by_index"].items() } - index_bytes = base64.b64decode(data["faiss_index"]) - self.index = faiss.deserialize_index(index_bytes) + buffer = io.BytesIO(base64.b64decode(data["faiss_index"])) + self.index = faiss.deserialize_index(np.loadtxt(buffer, dtype=np.uint8)) async def _save_index(self): if not self.kvstore or not self.bank_id: return - index_bytes = faiss.serialize_index(self.index) - + np_index = faiss.serialize_index(self.index) + buffer = io.BytesIO() + np.savetxt(buffer, np_index) data = { "id_by_index": self.id_by_index, "chunk_by_index": {k: v.json() for k, v in self.chunk_by_index.items()}, - "faiss_index": base64.b64encode(index_bytes).decode(), + "faiss_index": base64.b64encode(buffer.getvalue()).decode("utf-8"), } index_key = f"faiss_index:v1::{self.bank_id}" @@ -188,7 +190,7 @@ class FaissMemoryImpl(Memory, MemoryBanksProtocolPrivate): ) -> None: index = self.cache.get(bank_id) if index is None: - raise ValueError(f"Bank {bank_id} not found") + raise ValueError(f"Bank {bank_id} not found. found: {self.cache.keys()}") await index.insert_documents(documents) From f1b9578f8d80d395ecc955f77cefdcf19a2542e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladimir=20Ivi=C4=87?= Date: Fri, 15 Nov 2024 23:16:42 -0800 Subject: [PATCH 4/4] Extend shorthand support for the `llama stack run` command (#465) **Summary:** Extend the shorthand run command so it can run successfully when config exists under DISTRIBS_BASE_DIR (i.e. ~/.llama/distributions). For example, imagine you created a new stack using the `llama stack build` command where you named it "my-awesome-llama-stack". ``` $ llama stack build > Enter a name for your Llama Stack (e.g. my-local-stack): my-awesome-llama-stack ``` To run the stack you created you will have to use long config path: ``` llama stack run ~/.llama/distributions/llamastack-my-awesome-llama-stack/my-awesome-llama-stack-run.yaml ``` With this change, you can start it using the stack name instead of full path: ``` llama stack run my-awesome-llama-stack ``` **Test Plan:** Verify command fails when stack doesn't exist ``` python3 -m llama_stack.cli.llama stack run my-test-stack ``` Output [FAILURE] ``` usage: llama stack run [-h] [--port PORT] [--disable-ipv6] config llama stack run: error: File /Users/vladimirivic/.llama/distributions/llamastack-my-test-stack/my-test-stack-run.yaml does not exist. Please run `llama stack build` to generate (and optionally edit) a run.yaml file ``` Create a new stack using `llama stack build`. Name it `my-test-stack`. Verify command runs successfully ``` python3 -m llama_stack.cli.llama stack run my-test-stack ``` Output [SUCCESS] ``` Listening on ['::', '0.0.0.0']:5000 INFO: Started server process [80146] INFO: Waiting for application startup. INFO: Application startup complete. INFO: Uvicorn running on http://['::', '0.0.0.0']:5000 (Press CTRL+C to quit) ``` --- llama_stack/cli/stack/run.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 842703d4c..5fce8c92c 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -48,7 +48,10 @@ class StackRun(Subcommand): from llama_stack.distribution.build import ImageType from llama_stack.distribution.configure import parse_and_maybe_upgrade_config - from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR + from llama_stack.distribution.utils.config_dirs import ( + BUILDS_BASE_DIR, + DISTRIBS_BASE_DIR, + ) from llama_stack.distribution.utils.exec import run_with_pty if not args.config: @@ -68,6 +71,14 @@ class StackRun(Subcommand): BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml" ) + if not config_file.exists() and not args.config.endswith(".yaml"): + # check if it's a build config saved to ~/.llama dir + config_file = Path( + DISTRIBS_BASE_DIR + / f"llamastack-{args.config}" + / f"{args.config}-run.yaml" + ) + if not config_file.exists(): self.parser.error( f"File {str(config_file)} does not exist. Please run `llama stack build` to generate (and optionally edit) a run.yaml file"