forked from phoenix-oss/llama-stack-mirror
feat: Add a direct (non-agentic) RAG option to the Playground RAG page (#1940)
# What does this PR do? This PR makes it possible to switch between agentic and non-agentic RAG when running the respective Playground page. When non-agentic RAG is selected, user queries are answered by directly querying the vector DB, augmenting the prompt, and sending the extended prompt to the model via Inference API. ## Test Plan - Launch the Playground and go to the RAG page; - Select the vector DB ID; - Adjust other configuration parameters if necessary; - Set the radio button to Agent-based RAG; - Send a message to the chat; - The query will be answered by an agent using the knowledge search tool as indicated by the output; - Click the 'Clear Chat' button to make it possible to switch modes; - Send a message to the chat again; - This time, the query will be answered by the model directly as can be deduced from the reply.
This commit is contained in:
parent
c6fa47db6f
commit
40f41af2f7
1 changed files with 88 additions and 15 deletions
|
@ -9,6 +9,7 @@ import uuid
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
from llama_stack_client import Agent, AgentEventLogger, RAGDocument
|
from llama_stack_client import Agent, AgentEventLogger, RAGDocument
|
||||||
|
|
||||||
|
from llama_stack.apis.common.content_types import ToolCallDelta
|
||||||
from llama_stack.distribution.ui.modules.api import llama_stack_api
|
from llama_stack.distribution.ui.modules.api import llama_stack_api
|
||||||
from llama_stack.distribution.ui.modules.utils import data_url_from_file
|
from llama_stack.distribution.ui.modules.utils import data_url_from_file
|
||||||
|
|
||||||
|
@ -21,11 +22,11 @@ def rag_chat_page():
|
||||||
st.cache_resource.clear()
|
st.cache_resource.clear()
|
||||||
|
|
||||||
def should_disable_input():
|
def should_disable_input():
|
||||||
return "messages" in st.session_state and len(st.session_state.messages) > 0
|
return "displayed_messages" in st.session_state and len(st.session_state.displayed_messages) > 0
|
||||||
|
|
||||||
with st.sidebar:
|
with st.sidebar:
|
||||||
# File/Directory Upload Section
|
# File/Directory Upload Section
|
||||||
st.subheader("Upload Documents")
|
st.subheader("Upload Documents", divider=True)
|
||||||
uploaded_files = st.file_uploader(
|
uploaded_files = st.file_uploader(
|
||||||
"Upload file(s) or directory",
|
"Upload file(s) or directory",
|
||||||
accept_multiple_files=True,
|
accept_multiple_files=True,
|
||||||
|
@ -36,11 +37,11 @@ def rag_chat_page():
|
||||||
st.success(f"Successfully uploaded {len(uploaded_files)} files")
|
st.success(f"Successfully uploaded {len(uploaded_files)} files")
|
||||||
# Add memory bank name input field
|
# Add memory bank name input field
|
||||||
vector_db_name = st.text_input(
|
vector_db_name = st.text_input(
|
||||||
"Vector Database Name",
|
"Document Collection Name",
|
||||||
value="rag_vector_db",
|
value="rag_vector_db",
|
||||||
help="Enter a unique identifier for this vector database",
|
help="Enter a unique identifier for this document collection",
|
||||||
)
|
)
|
||||||
if st.button("Create Vector Database"):
|
if st.button("Create Document Collection"):
|
||||||
documents = [
|
documents = [
|
||||||
RAGDocument(
|
RAGDocument(
|
||||||
document_id=uploaded_file.name,
|
document_id=uploaded_file.name,
|
||||||
|
@ -71,17 +72,30 @@ def rag_chat_page():
|
||||||
)
|
)
|
||||||
st.success("Vector database created successfully!")
|
st.success("Vector database created successfully!")
|
||||||
|
|
||||||
st.subheader("Configure Agent")
|
st.subheader("RAG Parameters", divider=True)
|
||||||
|
|
||||||
|
rag_mode = st.radio(
|
||||||
|
"RAG mode",
|
||||||
|
["Direct", "Agent-based"],
|
||||||
|
captions=[
|
||||||
|
"RAG is performed by directly retrieving the information and augmenting the user query",
|
||||||
|
"RAG is performed by an agent activating a dedicated knowledge search tool.",
|
||||||
|
],
|
||||||
|
on_change=reset_agent_and_chat,
|
||||||
|
disabled=should_disable_input(),
|
||||||
|
)
|
||||||
|
|
||||||
# select memory banks
|
# select memory banks
|
||||||
vector_dbs = llama_stack_api.client.vector_dbs.list()
|
vector_dbs = llama_stack_api.client.vector_dbs.list()
|
||||||
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
|
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
|
||||||
selected_vector_dbs = st.multiselect(
|
selected_vector_dbs = st.multiselect(
|
||||||
label="Select Vector Databases",
|
label="Select Document Collections to use in RAG queries",
|
||||||
options=vector_dbs,
|
options=vector_dbs,
|
||||||
on_change=reset_agent_and_chat,
|
on_change=reset_agent_and_chat,
|
||||||
disabled=should_disable_input(),
|
disabled=should_disable_input(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
st.subheader("Inference Parameters", divider=True)
|
||||||
available_models = llama_stack_api.client.models.list()
|
available_models = llama_stack_api.client.models.list()
|
||||||
available_models = [model.identifier for model in available_models if model.model_type == "llm"]
|
available_models = [model.identifier for model in available_models if model.model_type == "llm"]
|
||||||
selected_model = st.selectbox(
|
selected_model = st.selectbox(
|
||||||
|
@ -127,9 +141,11 @@ def rag_chat_page():
|
||||||
# Chat Interface
|
# Chat Interface
|
||||||
if "messages" not in st.session_state:
|
if "messages" not in st.session_state:
|
||||||
st.session_state.messages = []
|
st.session_state.messages = []
|
||||||
|
if "displayed_messages" not in st.session_state:
|
||||||
|
st.session_state.displayed_messages = []
|
||||||
|
|
||||||
# Display chat history
|
# Display chat history
|
||||||
for message in st.session_state.messages:
|
for message in st.session_state.displayed_messages:
|
||||||
with st.chat_message(message["role"]):
|
with st.chat_message(message["role"]):
|
||||||
st.markdown(message["content"])
|
st.markdown(message["content"])
|
||||||
|
|
||||||
|
@ -161,14 +177,17 @@ def rag_chat_page():
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if rag_mode == "Agent-based":
|
||||||
agent = create_agent()
|
agent = create_agent()
|
||||||
|
|
||||||
if "agent_session_id" not in st.session_state:
|
if "agent_session_id" not in st.session_state:
|
||||||
st.session_state["agent_session_id"] = agent.create_session(session_name=f"rag_demo_{uuid.uuid4()}")
|
st.session_state["agent_session_id"] = agent.create_session(session_name=f"rag_demo_{uuid.uuid4()}")
|
||||||
|
|
||||||
session_id = st.session_state["agent_session_id"]
|
session_id = st.session_state["agent_session_id"]
|
||||||
|
|
||||||
def process_prompt(prompt):
|
def agent_process_prompt(prompt):
|
||||||
|
# Add user message to chat history
|
||||||
|
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
# Send the prompt to the agent
|
# Send the prompt to the agent
|
||||||
response = agent.create_turn(
|
response = agent.create_turn(
|
||||||
messages=[
|
messages=[
|
||||||
|
@ -197,11 +216,62 @@ def rag_chat_page():
|
||||||
message_placeholder.markdown(full_response)
|
message_placeholder.markdown(full_response)
|
||||||
|
|
||||||
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
||||||
|
st.session_state.displayed_messages.append({"role": "assistant", "content": full_response})
|
||||||
|
|
||||||
|
def direct_process_prompt(prompt):
|
||||||
|
# Add the system prompt in the beginning of the conversation
|
||||||
|
if len(st.session_state.messages) == 0:
|
||||||
|
st.session_state.messages.append({"role": "system", "content": system_prompt})
|
||||||
|
|
||||||
|
# Query the vector DB
|
||||||
|
rag_response = llama_stack_api.client.tool_runtime.rag_tool.query(
|
||||||
|
content=prompt, vector_db_ids=list(selected_vector_dbs)
|
||||||
|
)
|
||||||
|
prompt_context = rag_response.content
|
||||||
|
|
||||||
|
with st.chat_message("assistant"):
|
||||||
|
retrieval_message_placeholder = st.empty()
|
||||||
|
message_placeholder = st.empty()
|
||||||
|
full_response = ""
|
||||||
|
retrieval_response = ""
|
||||||
|
|
||||||
|
# Display the retrieved content
|
||||||
|
retrieval_response += str(prompt_context)
|
||||||
|
retrieval_message_placeholder.info(retrieval_response)
|
||||||
|
|
||||||
|
# Construct the extended prompt
|
||||||
|
extended_prompt = f"Please answer the following query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{prompt}"
|
||||||
|
|
||||||
|
# Run inference directly
|
||||||
|
st.session_state.messages.append({"role": "user", "content": extended_prompt})
|
||||||
|
response = llama_stack_api.client.inference.chat_completion(
|
||||||
|
messages=st.session_state.messages,
|
||||||
|
model_id=selected_model,
|
||||||
|
sampling_params={
|
||||||
|
"strategy": strategy,
|
||||||
|
},
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Display assistant response
|
||||||
|
for chunk in response:
|
||||||
|
response_delta = chunk.event.delta
|
||||||
|
if isinstance(response_delta, ToolCallDelta):
|
||||||
|
retrieval_response += response_delta.tool_call.replace("====", "").strip()
|
||||||
|
retrieval_message_placeholder.info(retrieval_response)
|
||||||
|
else:
|
||||||
|
full_response += chunk.event.delta.text
|
||||||
|
message_placeholder.markdown(full_response + "▌")
|
||||||
|
message_placeholder.markdown(full_response)
|
||||||
|
|
||||||
|
response_dict = {"role": "assistant", "content": full_response, "stop_reason": "end_of_message"}
|
||||||
|
st.session_state.messages.append(response_dict)
|
||||||
|
st.session_state.displayed_messages.append(response_dict)
|
||||||
|
|
||||||
# Chat input
|
# Chat input
|
||||||
if prompt := st.chat_input("Ask a question about your documents"):
|
if prompt := st.chat_input("Ask a question about your documents"):
|
||||||
# Add user message to chat history
|
# Add user message to chat history
|
||||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
st.session_state.displayed_messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
# Display user message
|
# Display user message
|
||||||
with st.chat_message("user"):
|
with st.chat_message("user"):
|
||||||
|
@ -214,7 +284,10 @@ def rag_chat_page():
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
if "prompt" in st.session_state and st.session_state.prompt is not None:
|
if "prompt" in st.session_state and st.session_state.prompt is not None:
|
||||||
process_prompt(st.session_state.prompt)
|
if rag_mode == "Agent-based":
|
||||||
|
agent_process_prompt(st.session_state.prompt)
|
||||||
|
else: # rag_mode == "Direct"
|
||||||
|
direct_process_prompt(st.session_state.prompt)
|
||||||
st.session_state.prompt = None
|
st.session_state.prompt = None
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue