mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-05 12:21:52 +00:00
use agent.inference_api instead of passing host/port again
This commit is contained in:
parent
4a70f3d2ba
commit
c2b7b462e9
3 changed files with 15 additions and 21 deletions
|
@ -31,7 +31,7 @@ from llama_toolchain.tools.builtin import (
|
|||
SingleMessageBuiltinTool,
|
||||
)
|
||||
|
||||
from .context_retriever import generate_rag_query
|
||||
from .rag.context_retriever import generate_rag_query
|
||||
from .safety import SafetyException, ShieldRunnerMixin
|
||||
|
||||
|
||||
|
@ -665,7 +665,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
# (i.e., no prior turns uploaded an Attachment)
|
||||
return None, []
|
||||
|
||||
query = await generate_rag_query(memory.query_generator_config, messages)
|
||||
query = await generate_rag_query(
|
||||
memory.query_generator_config, messages, inference_api=self.inference_api
|
||||
)
|
||||
tasks = [
|
||||
self.memory_api.query_documents(
|
||||
bank_id=bank_id,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue