use agent.inference_api instead of passing host/port again

This commit is contained in:
Hardik Shah 2024-09-06 12:48:08 -07:00
parent 4a70f3d2ba
commit c2b7b462e9
3 changed files with 15 additions and 21 deletions

View file

@ -31,7 +31,7 @@ from llama_toolchain.tools.builtin import (
SingleMessageBuiltinTool,
)
from .context_retriever import generate_rag_query
from .rag.context_retriever import generate_rag_query
from .safety import SafetyException, ShieldRunnerMixin
@ -665,7 +665,9 @@ class ChatAgent(ShieldRunnerMixin):
# (i.e., no prior turns uploaded an Attachment)
return None, []
query = await generate_rag_query(memory.query_generator_config, messages)
query = await generate_rag_query(
memory.query_generator_config, messages, inference_api=self.inference_api
)
tasks = [
self.memory_api.query_documents(
bank_id=bank_id,