mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
fix: Update rag examples to use fresh faiss index every time (#998)
# What does this PR do? In several examples we use the same faiss index , which means running it multiple times fills up the index with duplicates which eventually degrades the model performance on RAG as multiple copies of the same irrelevant chunks might be picked up several times. Fix is to ensure we create a new index each time. Resolves issue in this discussion - https://github.com/meta-llama/llama-stack/discussions/995 ## Test Plan Re-ran the getting started guide multiple times to see the same output Co-authored-by: Hardik Shah <hjshah@fb.com>
This commit is contained in:
parent
06e5af1435
commit
28a0fe57cc
3 changed files with 9 additions and 7 deletions
|
@ -89,7 +89,7 @@
|
|||
"# install a branch of llama stack\n",
|
||||
"import os\n",
|
||||
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
|
||||
"!pip install uv \n",
|
||||
"!pip install uv\n",
|
||||
"!uv pip install llama-stack"
|
||||
]
|
||||
},
|
||||
|
@ -691,7 +691,7 @@
|
|||
" from google.colab import userdata\n",
|
||||
" os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n",
|
||||
" os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n",
|
||||
"except ImportError: \n",
|
||||
"except ImportError:\n",
|
||||
" print(\"Not in Google Colab environment\")\n",
|
||||
"\n",
|
||||
"for key in ['TOGETHER_API_KEY', 'TAVILY_SEARCH_API_KEY']:\n",
|
||||
|
@ -1656,6 +1656,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||
|
@ -1673,7 +1674,7 @@
|
|||
" for i, url in enumerate(urls)\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vector_db_id = \"test-vector-db\"\n",
|
||||
"vector_db_id = f\"test-vector-db-{uuid.uuid4().hex}\"\n",
|
||||
"client.vector_dbs.register(\n",
|
||||
" vector_db_id=vector_db_id,\n",
|
||||
" embedding_model=\"all-MiniLM-L6-v2\",\n",
|
||||
|
@ -3098,7 +3099,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"# NBVAL_SKIP \n",
|
||||
"# NBVAL_SKIP\n",
|
||||
"print(f\"Getting traces for session_id={session_id}\")\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
|
|
|
@ -173,6 +173,7 @@ Here is an example of a simple RAG (Retrieval Augmented Generation) chatbot agen
|
|||
|
||||
```python
|
||||
import os
|
||||
import uuid
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
|
@ -214,7 +215,7 @@ documents = [
|
|||
]
|
||||
|
||||
# Register a vector database
|
||||
vector_db_id = "test-vector-db"
|
||||
vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
|
||||
client.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
|
|
|
@ -414,7 +414,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
|
|||
)
|
||||
for i, url in enumerate(urls)
|
||||
]
|
||||
vector_db_id = "test-vector-db"
|
||||
vector_db_id = f"test-vector-db-{uuid4()}"
|
||||
llama_stack_client.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue