fix: Update rag examples to use fresh faiss index every time (#998)

# What does this PR do?
In several examples we use the same faiss index , which means running it
multiple times fills up the index with duplicates which eventually
degrades the model performance on RAG as multiple copies of the same
irrelevant chunks might be picked up several times.

Fix is to ensure we create a new index each time. 

Resolves issue in this discussion -
https://github.com/meta-llama/llama-stack/discussions/995

## Test Plan
Re-ran the getting started guide multiple times to see the same output

Co-authored-by: Hardik Shah <hjshah@fb.com>
This commit is contained in:
Hardik Shah 2025-02-06 16:12:29 -08:00 committed by GitHub
parent 06e5af1435
commit 28a0fe57cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 9 additions and 7 deletions

View file

@ -89,7 +89,7 @@
"# install a branch of llama stack\n", "# install a branch of llama stack\n",
"import os\n", "import os\n",
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n", "os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
"!pip install uv \n", "!pip install uv\n",
"!uv pip install llama-stack" "!uv pip install llama-stack"
] ]
}, },
@ -691,7 +691,7 @@
" from google.colab import userdata\n", " from google.colab import userdata\n",
" os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n", " os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n",
" os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n", " os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n",
"except ImportError: \n", "except ImportError:\n",
" print(\"Not in Google Colab environment\")\n", " print(\"Not in Google Colab environment\")\n",
"\n", "\n",
"for key in ['TOGETHER_API_KEY', 'TAVILY_SEARCH_API_KEY']:\n", "for key in ['TOGETHER_API_KEY', 'TAVILY_SEARCH_API_KEY']:\n",
@ -1656,6 +1656,7 @@
} }
], ],
"source": [ "source": [
"import uuid\n",
"from llama_stack_client.lib.agents.agent import Agent\n", "from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n", "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types.agent_create_params import AgentConfig\n", "from llama_stack_client.types.agent_create_params import AgentConfig\n",
@ -1673,7 +1674,7 @@
" for i, url in enumerate(urls)\n", " for i, url in enumerate(urls)\n",
"]\n", "]\n",
"\n", "\n",
"vector_db_id = \"test-vector-db\"\n", "vector_db_id = f\"test-vector-db-{uuid.uuid4().hex}\"\n",
"client.vector_dbs.register(\n", "client.vector_dbs.register(\n",
" vector_db_id=vector_db_id,\n", " vector_db_id=vector_db_id,\n",
" embedding_model=\"all-MiniLM-L6-v2\",\n", " embedding_model=\"all-MiniLM-L6-v2\",\n",
@ -3098,7 +3099,7 @@
} }
], ],
"source": [ "source": [
"# NBVAL_SKIP \n", "# NBVAL_SKIP\n",
"print(f\"Getting traces for session_id={session_id}\")\n", "print(f\"Getting traces for session_id={session_id}\")\n",
"import json\n", "import json\n",
"\n", "\n",

View file

@ -173,6 +173,7 @@ Here is an example of a simple RAG (Retrieval Augmented Generation) chatbot agen
```python ```python
import os import os
import uuid
from termcolor import cprint from termcolor import cprint
from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.agent import Agent
@ -214,7 +215,7 @@ documents = [
] ]
# Register a vector database # Register a vector database
vector_db_id = "test-vector-db" vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
client.vector_dbs.register( client.vector_dbs.register(
vector_db_id=vector_db_id, vector_db_id=vector_db_id,
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",

View file

@ -297,7 +297,7 @@ def test_override_system_message_behavior(llama_stack_client, agent_config):
You are an expert in composing functions. You are given a question and a set of possible functions. You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you may or may not need to make one or more function/tool calls to achieve the purpose. Based on the question, you may or may not need to make one or more function/tool calls to achieve the purpose.
If none of the function can be used, don't return [], instead answer the question directly without using functions. If the given question lacks the parameters required by the function, If none of the function can be used, don't return [], instead answer the question directly without using functions. If the given question lacks the parameters required by the function,
also point it out. also point it out.
{{ function_description }} {{ function_description }}
""" """
@ -414,7 +414,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
) )
for i, url in enumerate(urls) for i, url in enumerate(urls)
] ]
vector_db_id = "test-vector-db" vector_db_id = f"test-vector-db-{uuid4()}"
llama_stack_client.vector_dbs.register( llama_stack_client.vector_dbs.register(
vector_db_id=vector_db_id, vector_db_id=vector_db_id,
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",