From 20bf2f50c28f7f22d8c83449dea9a697e16e5fe1 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Fri, 15 Nov 2024 12:20:18 -0800
Subject: [PATCH 1/4] No more model_id warnings

---
 llama_stack/apis/models/models.py         |  4 +++-
 llama_stack/distribution/server/server.py | 14 +++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index a1bfcac00..aabe78d85 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -7,7 +7,7 @@
 from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
 
 from llama_models.schema_utils import json_schema_type, webmethod
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.apis.resource import Resource, ResourceType
 
@@ -37,6 +37,8 @@ class ModelInput(CommonModelFields):
     provider_id: Optional[str] = None
     provider_model_id: Optional[str] = None
 
+    model_config = ConfigDict(protected_namespaces=())
+
 
 @runtime_checkable
 class Models(Protocol):
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 5796b6c68..0cfd11eda 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -369,12 +369,16 @@ def main(
 
             impl_method = getattr(impl, endpoint.name)
 
-            getattr(app, endpoint.method)(endpoint.route, response_model=None)(
-                create_dynamic_typed_route(
-                    impl_method,
-                    endpoint.method,
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore", category=UserWarning, module="pydantic._internal._fields"
+                )
+                getattr(app, endpoint.method)(endpoint.route, response_model=None)(
+                    create_dynamic_typed_route(
+                        impl_method,
+                        endpoint.method,
+                    )
                 )
-            )
 
         cprint(f"Serving API {api_str}", "white", attrs=["bold"])
         for endpoint in endpoints:

From ff99025875b76119f37c2d90a2fd20ee3782384b Mon Sep 17 00:00:00 2001
From: Dinesh Yeduguru <yvdinesh@gmail.com>
Date: Fri, 15 Nov 2024 14:21:31 -0800
Subject: [PATCH 2/4] await initialize in faiss (#463)

tests:
```
 torchrun $CONDA_PREFIX/bin/pytest -v -s -m "faiss" llama_stack/providers/tests/memory/test_memory.py
```

Co-authored-by: Dinesh Yeduguru <dineshyv@fb.com>
---
 .../providers/inline/memory/faiss/faiss.py       | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/llama_stack/providers/inline/memory/faiss/faiss.py b/llama_stack/providers/inline/memory/faiss/faiss.py
index 92235ea89..07c42d389 100644
--- a/llama_stack/providers/inline/memory/faiss/faiss.py
+++ b/llama_stack/providers/inline/memory/faiss/faiss.py
@@ -45,7 +45,12 @@ class FaissIndex(EmbeddingIndex):
         self.chunk_by_index = {}
         self.kvstore = kvstore
         self.bank_id = bank_id
-        self.initialize()
+
+    @classmethod
+    async def create(cls, dimension: int, kvstore=None, bank_id: str = None):
+        instance = cls(dimension, kvstore, bank_id)
+        await instance.initialize()
+        return instance
 
     async def initialize(self) -> None:
         if not self.kvstore:
@@ -132,7 +137,10 @@ class FaissMemoryImpl(Memory, MemoryBanksProtocolPrivate):
         for bank_data in stored_banks:
             bank = VectorMemoryBank.model_validate_json(bank_data)
             index = BankWithIndex(
-                bank=bank, index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION, self.kvstore)
+                bank=bank,
+                index=await FaissIndex.create(
+                    ALL_MINILM_L6_V2_DIMENSION, self.kvstore, bank.identifier
+                ),
             )
             self.cache[bank.identifier] = index
 
@@ -158,7 +166,9 @@ class FaissMemoryImpl(Memory, MemoryBanksProtocolPrivate):
         # Store in cache
         index = BankWithIndex(
             bank=memory_bank,
-            index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION, self.kvstore),
+            index=await FaissIndex.create(
+                ALL_MINILM_L6_V2_DIMENSION, self.kvstore, memory_bank.identifier
+            ),
         )
         self.cache[memory_bank.identifier] = index
 

From 57bafd0f8c61dcdff86701aeb2be40ef8175b953 Mon Sep 17 00:00:00 2001
From: Dinesh Yeduguru <yvdinesh@gmail.com>
Date: Fri, 15 Nov 2024 18:02:48 -0800
Subject: [PATCH 3/4] fix faiss serialize and serialize of index (#464)

faiss serialize index returns a np object, that we first need to save to
buffer and then write to sqllite. Since we are using json, we need to
base64 encode the data.

Same in the read path, we base64 decode and read into np array and then
call into deserialize index.

tests:
torchrun $CONDA_PREFIX/bin/pytest -v -s -m "faiss"
llama_stack/providers/tests/memory/test_memory.py

Co-authored-by: Dinesh Yeduguru <dineshyv@fb.com>
---
 llama_stack/providers/inline/memory/faiss/faiss.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/llama_stack/providers/inline/memory/faiss/faiss.py b/llama_stack/providers/inline/memory/faiss/faiss.py
index 07c42d389..95791bc69 100644
--- a/llama_stack/providers/inline/memory/faiss/faiss.py
+++ b/llama_stack/providers/inline/memory/faiss/faiss.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import base64
+import io
 import json
 import logging
 
@@ -67,19 +68,20 @@ class FaissIndex(EmbeddingIndex):
                 for k, v in data["chunk_by_index"].items()
             }
 
-            index_bytes = base64.b64decode(data["faiss_index"])
-            self.index = faiss.deserialize_index(index_bytes)
+            buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
+            self.index = faiss.deserialize_index(np.loadtxt(buffer, dtype=np.uint8))
 
     async def _save_index(self):
         if not self.kvstore or not self.bank_id:
             return
 
-        index_bytes = faiss.serialize_index(self.index)
-
+        np_index = faiss.serialize_index(self.index)
+        buffer = io.BytesIO()
+        np.savetxt(buffer, np_index)
         data = {
             "id_by_index": self.id_by_index,
             "chunk_by_index": {k: v.json() for k, v in self.chunk_by_index.items()},
-            "faiss_index": base64.b64encode(index_bytes).decode(),
+            "faiss_index": base64.b64encode(buffer.getvalue()).decode("utf-8"),
         }
 
         index_key = f"faiss_index:v1::{self.bank_id}"
@@ -188,7 +190,7 @@ class FaissMemoryImpl(Memory, MemoryBanksProtocolPrivate):
     ) -> None:
         index = self.cache.get(bank_id)
         if index is None:
-            raise ValueError(f"Bank {bank_id} not found")
+            raise ValueError(f"Bank {bank_id} not found. found: {self.cache.keys()}")
 
         await index.insert_documents(documents)
 

From f1b9578f8d80d395ecc955f77cefdcf19a2542e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladimir=20Ivi=C4=87?= <vladimir.ivic@me.com>
Date: Fri, 15 Nov 2024 23:16:42 -0800
Subject: [PATCH 4/4] Extend shorthand support for the `llama stack run`
 command (#465)

**Summary:**
Extend the shorthand run command so it can run successfully when config
exists under DISTRIBS_BASE_DIR (i.e. ~/.llama/distributions).

For example, imagine you created a new stack using the `llama stack
build` command where you named it "my-awesome-llama-stack".

```
$ llama stack build

> Enter a name for your Llama Stack (e.g. my-local-stack): my-awesome-llama-stack
```

To run the stack you created you will have to use long config path:
```
llama stack run ~/.llama/distributions/llamastack-my-awesome-llama-stack/my-awesome-llama-stack-run.yaml
```

With this change, you can start it using the stack name instead of full
path:
```
llama stack run my-awesome-llama-stack
```

**Test Plan:**
Verify command fails when stack doesn't exist
```
python3 -m llama_stack.cli.llama stack run my-test-stack
```

Output [FAILURE]
```
usage: llama stack run [-h] [--port PORT] [--disable-ipv6] config
llama stack run: error: File /Users/vladimirivic/.llama/distributions/llamastack-my-test-stack/my-test-stack-run.yaml does not exist. Please run `llama stack build` to generate (and optionally edit) a run.yaml file
```

Create a new stack using `llama stack build`.
Name it `my-test-stack`.

Verify command runs successfully
```
python3 -m llama_stack.cli.llama stack run my-test-stack
```

Output [SUCCESS]
```
Listening on ['::', '0.0.0.0']:5000
INFO:     Started server process [80146]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://['::', '0.0.0.0']:5000 (Press CTRL+C to quit)
```
---
 llama_stack/cli/stack/run.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index 842703d4c..5fce8c92c 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -48,7 +48,10 @@ class StackRun(Subcommand):
 
         from llama_stack.distribution.build import ImageType
         from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
-        from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
+        from llama_stack.distribution.utils.config_dirs import (
+            BUILDS_BASE_DIR,
+            DISTRIBS_BASE_DIR,
+        )
         from llama_stack.distribution.utils.exec import run_with_pty
 
         if not args.config:
@@ -68,6 +71,14 @@ class StackRun(Subcommand):
                 BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml"
             )
 
+        if not config_file.exists() and not args.config.endswith(".yaml"):
+            # check if it's a build config saved to ~/.llama dir
+            config_file = Path(
+                DISTRIBS_BASE_DIR
+                / f"llamastack-{args.config}"
+                / f"{args.config}-run.yaml"
+            )
+
         if not config_file.exists():
             self.parser.error(
                 f"File {str(config_file)} does not exist. Please run `llama stack build` to generate (and optionally edit) a run.yaml file"