diff --git a/llama_toolchain/agentic_system/api/datatypes.py b/llama_toolchain/agentic_system/api/datatypes.py
index e529e7e6a..dbebb9fec 100644
--- a/llama_toolchain/agentic_system/api/datatypes.py
+++ b/llama_toolchain/agentic_system/api/datatypes.py
@@ -10,13 +10,13 @@ from typing import Any, Dict, List, Literal, Optional, Union
from llama_models.schema_utils import json_schema_type
-from pydantic import BaseModel, ConfigDict, Field, validator
+from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import Annotated
from llama_toolchain.common.deployment_types import * # noqa: F403
from llama_toolchain.inference.api import * # noqa: F403
-from llama_toolchain.safety.api.datatypes import * # noqa: F403
-from llama_toolchain.memory.api.datatypes import * # noqa: F403
+from llama_toolchain.safety.api import * # noqa: F403
+from llama_toolchain.memory.api import * # noqa: F403
@json_schema_type
@@ -25,31 +25,81 @@ class Attachment(BaseModel):
mime_type: str
-class AgenticSystemBuiltinTool(BuiltinTool):
+class AgenticSystemTool(Enum):
+ brave_search = "brave_search"
+ wolfram_alpha = "wolfram_alpha"
+ photogen = "photogen"
+ code_interpreter = "code_interpreter"
+
+ function_call = "function_call"
memory = "memory"
-@json_schema_type
-class AgenticSystemToolDefinition(BaseModel):
- tool_name: Union[AgenticSystemBuiltinTool, str]
- description: Optional[str] = None
- parameters: Optional[Dict[str, ToolParamDefinition]] = None
-
- @validator("tool_name", pre=True)
- @classmethod
- def validate_field(cls, v):
- if isinstance(v, str):
- try:
- return AgenticSystemBuiltinTool(v)
- except ValueError:
- return v
- return v
-
- execution_config: Optional[RestAPIExecutionConfig] = None
+class ToolDefinitionCommon(BaseModel):
input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
+@json_schema_type
+class BraveSearchToolDefinition(ToolDefinitionCommon):
+ type: Literal[AgenticSystemTool.brave_search.value] = (
+ AgenticSystemTool.brave_search.value
+ )
+ remote_execution: Optional[RestAPIExecutionConfig] = None
+
+
+@json_schema_type
+class WolframAlphaToolDefinition(ToolDefinitionCommon):
+ type: Literal[AgenticSystemTool.wolfram_alpha.value] = (
+ AgenticSystemTool.wolfram_alpha.value
+ )
+ remote_execution: Optional[RestAPIExecutionConfig] = None
+
+
+@json_schema_type
+class PhotogenToolDefinition(ToolDefinitionCommon):
+ type: Literal[AgenticSystemTool.photogen.value] = AgenticSystemTool.photogen.value
+ remote_execution: Optional[RestAPIExecutionConfig] = None
+
+
+@json_schema_type
+class CodeInterpreterToolDefinition(ToolDefinitionCommon):
+ type: Literal[AgenticSystemTool.code_interpreter.value] = (
+ AgenticSystemTool.code_interpreter.value
+ )
+ enable_inline_code_execution: bool = True
+ remote_execution: Optional[RestAPIExecutionConfig] = None
+
+
+@json_schema_type
+class FunctionCallToolDefinition(ToolDefinitionCommon):
+ type: Literal[AgenticSystemTool.function_call.value] = (
+ AgenticSystemTool.function_call.value
+ )
+ description: str
+ parameters: Dict[str, ToolParamDefinition]
+ remote_execution: Optional[RestAPIExecutionConfig] = None
+
+
+@json_schema_type
+class MemoryToolDefinition(ToolDefinitionCommon):
+ type: Literal[AgenticSystemTool.memory.value] = AgenticSystemTool.memory.value
+ memory_banks: List[MemoryBank] = Field(default_factory=list)
+
+
+AgenticSystemToolDefinition = Annotated[
+ Union[
+ BraveSearchToolDefinition,
+ WolframAlphaToolDefinition,
+ PhotogenToolDefinition,
+ CodeInterpreterToolDefinition,
+ FunctionCallToolDefinition,
+ MemoryToolDefinition,
+ ],
+ Field(discriminator="type"),
+]
+
+
class StepCommon(BaseModel):
turn_id: str
step_id: str
@@ -136,27 +186,45 @@ class Session(BaseModel):
started_at: datetime
-@json_schema_type
-class MemoryConfig(BaseModel):
- memory_bank_id: str
+class MemoryBankConfigCommon(BaseModel):
+ bank_id: str
- # this configuration can hold other information we may want to configure
- # how will the agent use the memory bank API?
- #
- #
+
+class VectorMemoryBankConfig(MemoryBankConfigCommon):
+ type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
+
+
+class KeyValueMemoryBankConfig(MemoryBankConfigCommon):
+ type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value
+ keys: List[str] # what keys to focus on
+
+
+class KeywordMemoryBankConfig(MemoryBankConfigCommon):
+ type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value
+
+
+class GraphMemoryBankConfig(MemoryBankConfigCommon):
+ type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value
+ entities: List[str] # what entities to focus on
+
+
+MemoryBankConfig = Annotated[
+ Union[
+ VectorMemoryBankConfig,
+ KeyValueMemoryBankConfig,
+ KeywordMemoryBankConfig,
+ GraphMemoryBankConfig,
+ ],
+ Field(discriminator="type"),
+]
class AgentConfigCommon(BaseModel):
sampling_params: Optional[SamplingParams] = SamplingParams()
- memory_configs: Optional[List[MemoryConfig]] = Field(default_factory=list)
input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
- # if you completely want to replace the messages prefixed by the system,
- # this is debug only
- debug_prefix_messages: Optional[List[Message]] = Field(default_factory=list)
-
tools: Optional[List[AgenticSystemToolDefinition]] = Field(default_factory=list)
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
tool_prompt_format: Optional[ToolPromptFormat] = Field(
@@ -168,6 +236,7 @@ class AgentConfigCommon(BaseModel):
class AgentConfig(AgentConfigCommon):
model: str
instructions: str
+ memory_bank_configs: Optional[List[MemoryBankConfig]] = Field(default_factory=list)
class AgentConfigOverridablePerTurn(AgentConfigCommon):
diff --git a/llama_toolchain/agentic_system/api/endpoints.py b/llama_toolchain/agentic_system/api/endpoints.py
index 6ef35c30e..663edeb8d 100644
--- a/llama_toolchain/agentic_system/api/endpoints.py
+++ b/llama_toolchain/agentic_system/api/endpoints.py
@@ -10,21 +10,9 @@ from typing import Protocol
from llama_models.schema_utils import json_schema_type, webmethod
-@json_schema_type
-class AgenticSystemCreateRequest(BaseModel):
- agent_config: AgentConfig
-
-
@json_schema_type
class AgenticSystemCreateResponse(BaseModel):
- # TODO: rename this to agent_id
- system_id: str
-
-
-@json_schema_type
-class AgenticSystemSessionCreateRequest(BaseModel):
- system_id: str
- session_name: str
+ agent_id: str
@json_schema_type
@@ -33,8 +21,8 @@ class AgenticSystemSessionCreateResponse(BaseModel):
@json_schema_type
-class AgenticSystemTurnCreateRequest(BaseModel, AgentConfigOverridablePerTurn):
- system_id: str
+class AgenticSystemTurnCreateRequest(AgentConfigOverridablePerTurn):
+ agent_id: str
session_id: str
# TODO: figure out how we can simplify this and make why
@@ -67,7 +55,7 @@ class AgenticSystem(Protocol):
@webmethod(route="/agentic_system/create")
async def create_agentic_system(
self,
- request: AgenticSystemCreateRequest,
+ agent_config: AgentConfig,
) -> AgenticSystemCreateResponse: ...
@webmethod(route="/agentic_system/turn/create")
@@ -91,7 +79,8 @@ class AgenticSystem(Protocol):
@webmethod(route="/agentic_system/session/create")
async def create_agentic_system_session(
self,
- request: AgenticSystemSessionCreateRequest,
+ agent_id: str,
+ session_name: str,
) -> AgenticSystemSessionCreateResponse: ...
@webmethod(route="/agentic_system/session/get")
diff --git a/llama_toolchain/agentic_system/client.py b/llama_toolchain/agentic_system/client.py
index 3f8e63245..e3f7add44 100644
--- a/llama_toolchain/agentic_system/client.py
+++ b/llama_toolchain/agentic_system/client.py
@@ -26,9 +26,7 @@ from llama_toolchain.agentic_system.event_logger import EventLogger
from .api import (
AgentConfig,
AgenticSystem,
- AgenticSystemCreateRequest,
AgenticSystemCreateResponse,
- AgenticSystemSessionCreateRequest,
AgenticSystemSessionCreateResponse,
AgenticSystemToolDefinition,
AgenticSystemTurnCreateRequest,
@@ -127,27 +125,23 @@ async def run_main(host: str, port: int):
),
]
- create_request = AgenticSystemCreateRequest(
+ agent_config = AgentConfig(
model="Meta-Llama3.1-8B-Instruct",
- agent_config=AgentConfig(
- instructions="You are a helpful assistant",
- sampling_params=SamplingParams(),
- available_tools=tool_definitions,
- input_shields=[],
- output_shields=[],
- debug_prefix_messages=[],
- tool_prompt_format=ToolPromptFormat.json,
- ),
+ instructions="You are a helpful assistant",
+ sampling_params=SamplingParams(),
+ tools=tool_definitions,
+ input_shields=[],
+ output_shields=[],
+ debug_prefix_messages=[],
+ tool_prompt_format=ToolPromptFormat.json,
)
- create_response = await api.create_agentic_system(create_request)
+ create_response = await api.create_agentic_system(agent_config)
print(create_response)
session_response = await api.create_agentic_system_session(
- AgenticSystemSessionCreateRequest(
- system_id=create_response.system_id,
- session_name="test_session",
- )
+ agent_id=create_response.agent_id,
+ session_name="test_session",
)
print(session_response)
@@ -162,7 +156,7 @@ async def run_main(host: str, port: int):
cprint(f"User> {content}", color="blue")
iterator = api.create_agentic_system_turn(
AgenticSystemTurnCreateRequest(
- system_id=create_response.system_id,
+ agent_id=create_response.agent_id,
session_id=session_response.session_id,
messages=[
UserMessage(content=content),
diff --git a/llama_toolchain/agentic_system/meta_reference/agent_instance.py b/llama_toolchain/agentic_system/meta_reference/agent_instance.py
index 770bd8d1a..947b94a0f 100644
--- a/llama_toolchain/agentic_system/meta_reference/agent_instance.py
+++ b/llama_toolchain/agentic_system/meta_reference/agent_instance.py
@@ -8,7 +8,7 @@
import copy
import uuid
from datetime import datetime
-from typing import AsyncGenerator, List, Optional
+from typing import AsyncGenerator, List
from llama_models.llama3.api.datatypes import ToolPromptFormat
@@ -326,7 +326,7 @@ class ChatAgent(ShieldRunnerMixin):
req = ChatCompletionRequest(
model=self.agent_config.model,
messages=input_messages,
- tools=self.agent_config.available_tools,
+ tools=self.agent_config.tools,
tool_prompt_format=self.agent_config.tool_prompt_format,
stream=True,
sampling_params=sampling_params,
diff --git a/llama_toolchain/agentic_system/meta_reference/agentic_system.py b/llama_toolchain/agentic_system/meta_reference/agentic_system.py
index 9078b5222..d89058abe 100644
--- a/llama_toolchain/agentic_system/meta_reference/agentic_system.py
+++ b/llama_toolchain/agentic_system/meta_reference/agentic_system.py
@@ -24,7 +24,7 @@ from llama_toolchain.tools.builtin import (
)
from llama_toolchain.tools.safety import with_safety
-from .agent_instance import AgentInstance, ChatAgent
+from .agent_instance import ChatAgent
from .config import MetaReferenceImplConfig
@@ -71,11 +71,11 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
self,
request: AgenticSystemCreateRequest,
) -> AgenticSystemCreateResponse:
- system_id = str(uuid.uuid4())
+ agent_id = str(uuid.uuid4())
builtin_tools = []
cfg = request.agent_config
- for dfn in cfg.available_tools:
+ for dfn in cfg.tools:
if isinstance(dfn.tool_name, BuiltinTool):
if dfn.tool_name == BuiltinTool.wolfram_alpha:
key = self.config.wolfram_api_key
@@ -102,7 +102,7 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
)
)
- AGENT_INSTANCES_BY_ID[system_id] = ChatAgent(
+ AGENT_INSTANCES_BY_ID[agent_id] = ChatAgent(
agent_config=cfg,
inference_api=self.inference_api,
safety_api=self.safety_api,
@@ -111,16 +111,16 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
)
return AgenticSystemCreateResponse(
- system_id=system_id,
+ agent_id=agent_id,
)
async def create_agentic_system_session(
self,
request: AgenticSystemSessionCreateRequest,
) -> AgenticSystemSessionCreateResponse:
- system_id = request.system_id
- assert system_id in AGENT_INSTANCES_BY_ID, f"System {system_id} not found"
- agent = AGENT_INSTANCES_BY_ID[system_id]
+ agent_id = request.agent_id
+ assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found"
+ agent = AGENT_INSTANCES_BY_ID[agent_id]
session = agent.create_session(request.session_name)
return AgenticSystemSessionCreateResponse(
@@ -131,9 +131,9 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
self,
request: AgenticSystemTurnCreateRequest,
) -> AsyncGenerator:
- system_id = request.system_id
- assert system_id in AGENT_INSTANCES_BY_ID, f"System {system_id} not found"
- agent = AGENT_INSTANCES_BY_ID[system_id]
+ agent_id = request.agent_id
+ assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found"
+ agent = AGENT_INSTANCES_BY_ID[agent_id]
assert (
request.session_id in agent.sessions
diff --git a/llama_toolchain/agentic_system/meta_reference/execute_with_custom_tools.py b/llama_toolchain/agentic_system/meta_reference/execute_with_custom_tools.py
index 4729d35a7..2d0068894 100644
--- a/llama_toolchain/agentic_system/meta_reference/execute_with_custom_tools.py
+++ b/llama_toolchain/agentic_system/meta_reference/execute_with_custom_tools.py
@@ -19,7 +19,7 @@ from llama_toolchain.inference.api import Message
async def execute_with_custom_tools(
system: AgenticSystem,
- system_id: str,
+ agent_id: str,
session_id: str,
messages: List[Message],
custom_tools: List[Any],
@@ -35,7 +35,7 @@ async def execute_with_custom_tools(
n_iter += 1
request = AgenticSystemTurnCreateRequest(
- system_id=system_id,
+ agent_id=agent_id,
session_id=session_id,
messages=current_messages,
stream=stream,
diff --git a/llama_toolchain/agentic_system/utils.py b/llama_toolchain/agentic_system/utils.py
index f146402c7..1ac05ce73 100644
--- a/llama_toolchain/agentic_system/utils.py
+++ b/llama_toolchain/agentic_system/utils.py
@@ -14,12 +14,7 @@ from llama_models.llama3.api.datatypes import (
ToolPromptFormat,
)
-from llama_toolchain.agentic_system.api import (
- AgentConfig,
- AgenticSystemCreateRequest,
- AgenticSystemSessionCreateRequest,
- AgenticSystemToolDefinition,
-)
+from llama_toolchain.agentic_system.api import AgentConfig, AgenticSystemToolDefinition
from llama_toolchain.agentic_system.client import AgenticSystemClient
from llama_toolchain.agentic_system.meta_reference.execute_with_custom_tools import (
@@ -32,9 +27,9 @@ from llama_toolchain.safety.api.datatypes import BuiltinShield, ShieldDefinition
class AgenticSystemClientWrapper:
- def __init__(self, api, system_id, custom_tools):
+ def __init__(self, api, agent_id, custom_tools):
self.api = api
- self.system_id = system_id
+ self.agent_id = agent_id
self.custom_tools = custom_tools
self.session_id = None
@@ -43,10 +38,8 @@ class AgenticSystemClientWrapper:
name = f"Session-{uuid.uuid4()}"
response = await self.api.create_agentic_system_session(
- AgenticSystemSessionCreateRequest(
- system_id=self.system_id,
- session_name=name,
- )
+ agent_id=self.agent_id,
+ session_name=name,
)
self.session_id = response.session_id
return self.session_id
@@ -54,7 +47,7 @@ class AgenticSystemClientWrapper:
async def run(self, messages: List[Message], stream: bool = True):
async for chunk in execute_with_custom_tools(
self.api,
- self.system_id,
+ self.agent_id,
self.session_id,
messages,
self.custom_tools,
@@ -98,29 +91,27 @@ async def get_agent_system_instance(
ShieldDefinition(shield_type=BuiltinShield.injection_shield),
]
- create_request = AgenticSystemCreateRequest(
+ agent_config = AgentConfig(
model=model,
- agent_config=AgentConfig(
- instructions="You are a helpful assistant",
- available_tools=tool_definitions,
- input_shields=(
- []
- if disable_safety
- else [
- ShieldDefinition(shield_type=BuiltinShield.llama_guard),
- ShieldDefinition(shield_type=BuiltinShield.jailbreak_shield),
- ]
- ),
- output_shields=(
- []
- if disable_safety
- else [
- ShieldDefinition(shield_type=BuiltinShield.llama_guard),
- ]
- ),
- sampling_params=SamplingParams(),
- tool_prompt_format=tool_prompt_format,
+ instructions="You are a helpful assistant",
+ tools=tool_definitions,
+ input_shields=(
+ []
+ if disable_safety
+ else [
+ ShieldDefinition(shield_type=BuiltinShield.llama_guard),
+ ShieldDefinition(shield_type=BuiltinShield.jailbreak_shield),
+ ]
),
+ output_shields=(
+ []
+ if disable_safety
+ else [
+ ShieldDefinition(shield_type=BuiltinShield.llama_guard),
+ ]
+ ),
+ sampling_params=SamplingParams(),
+ tool_prompt_format=tool_prompt_format,
)
- create_response = await api.create_agentic_system(create_request)
- return AgenticSystemClientWrapper(api, create_response.system_id, custom_tools)
+ create_response = await api.create_agentic_system(agent_config)
+ return AgenticSystemClientWrapper(api, create_response.agent_id, custom_tools)
diff --git a/llama_toolchain/memory/api/datatypes.py b/llama_toolchain/memory/api/datatypes.py
index 878090c46..756f351d8 100644
--- a/llama_toolchain/memory/api/datatypes.py
+++ b/llama_toolchain/memory/api/datatypes.py
@@ -3,23 +3,3 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-
-from typing import Any, Dict
-
-from llama_models.schema_utils import json_schema_type
-
-from pydantic import BaseModel
-
-
-@json_schema_type
-class MemoryBank(BaseModel):
- memory_bank_id: str
- memory_bank_name: str
-
-
-@json_schema_type
-class MemoryBankDocument(BaseModel):
- document_id: str
- content: bytes
- metadata: Dict[str, Any]
- mime_type: str
diff --git a/llama_toolchain/memory/api/endpoints.py b/llama_toolchain/memory/api/endpoints.py
index 810d821f7..29c2c889e 100644
--- a/llama_toolchain/memory/api/endpoints.py
+++ b/llama_toolchain/memory/api/endpoints.py
@@ -6,76 +6,132 @@
from typing import List, Protocol
-from llama_models.llama3.api.datatypes import InterleavedTextMedia
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_models.schema_utils import webmethod
from .datatypes import * # noqa: F403
@json_schema_type
-class RetrieveMemoryDocumentsRequest(BaseModel):
- query: InterleavedTextMedia
- bank_ids: str
+class MemoryBankDocument(BaseModel):
+ document_id: str
+ content: InterleavedTextMedia | URL
+ mime_type: str
+ metadata: Dict[str, Any]
+
+
+class Chunk(BaseModel):
+ content: InterleavedTextMedia
+ token_count: int
@json_schema_type
-class RetrieveMemoryDocumentsResponse(BaseModel):
- documents: List[MemoryBankDocument]
+class QueryDocumentsResponse(BaseModel):
+ chunks: List[Chunk]
scores: List[float]
+@json_schema_type
+class MemoryBankType(Enum):
+ vector = "vector"
+ keyvalue = "keyvalue"
+ keyword = "keyword"
+ graph = "graph"
+
+
+class VectorMemoryBankConfig(BaseModel):
+ type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
+ embedding_model: str
+
+
+class KeyValueMemoryBankConfig(BaseModel):
+ type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value
+
+
+class KeywordMemoryBankConfig(BaseModel):
+ type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value
+
+
+class GraphMemoryBankConfig(BaseModel):
+ type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value
+
+
+MemoryBankConfig = Annotated[
+ Union[
+ VectorMemoryBankConfig,
+ KeyValueMemoryBankConfig,
+ KeywordMemoryBankConfig,
+ GraphMemoryBankConfig,
+ ],
+ Field(discriminator="type"),
+]
+
+
+@json_schema_type
+class MemoryBank(BaseModel):
+ bank_id: str
+ name: str
+ config: MemoryBankConfig
+ # if there's a pre-existing store which obeys the MemoryBank REST interface
+ url: Optional[URL] = None
+
+
class Memory(Protocol):
@webmethod(route="/memory_banks/create")
def create_memory_bank(
self,
- bank_id: str,
- bank_name: str,
- embedding_model: str,
- documents: List[MemoryBankDocument],
- ) -> None: ...
+ name: str,
+ config: MemoryBankConfig,
+ url: Optional[URL] = None,
+ ) -> MemoryBank: ...
- @webmethod(route="/memory_banks/list")
- def get_memory_banks(self) -> List[MemoryBank]: ...
+ @webmethod(route="/memory_banks/list", method="GET")
+ def list_memory_banks(self) -> List[MemoryBank]: ...
@webmethod(route="/memory_banks/get")
- def get_memory_bank(self, bank_id: str) -> List[MemoryBank]: ...
+ def get_memory_bank(self, bank_id: str) -> MemoryBank: ...
- @webmethod(route="/memory_banks/drop")
- def delete_memory_bank(
+ @webmethod(route="/memory_banks/drop", method="DELETE")
+ def drop_memory_bank(
self,
bank_id: str,
) -> str: ...
@webmethod(route="/memory_bank/insert")
- def insert_memory_documents(
+ def insert_documents(
self,
bank_id: str,
documents: List[MemoryBankDocument],
) -> None: ...
@webmethod(route="/memory_bank/update")
- def update_memory_documents(
+ def update_documents(
self,
bank_id: str,
documents: List[MemoryBankDocument],
) -> None: ...
- @webmethod(route="/memory_bank/get")
- def retrieve_memory_documents(
- self,
- request: RetrieveMemoryDocumentsRequest,
- ) -> List[MemoryBankDocument]: ...
-
- @webmethod(route="/memory_bank/get")
- def get_memory_documents(
+ @webmethod(route="/memory_bank/query")
+ def query_documents(
self,
bank_id: str,
- document_uuids: List[str],
- ) -> List[MemoryBankDocument]: ...
+ query: InterleavedTextMedia,
+ params: Optional[Dict[str, Any]] = None,
+ ) -> QueryDocumentsResponse: ...
- @webmethod(route="/memory_bank/delete")
- def delete_memory_documents(
+ @webmethod(route="/memory_bank/documents/get")
+ def get_documents(
self,
bank_id: str,
- document_uuids: List[str],
- ) -> List[str]: ...
+ document_ids: List[str],
+ ) -> List[MemoryBankDocument]: ...
+
+ @webmethod(route="/memory_bank/documents/delete")
+ def delete_documents(
+ self,
+ bank_id: str,
+ document_ids: List[str],
+ ) -> None: ...
diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
index f8dab9ec3..aabf8aa84 100644
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-08-21 14:16:38.313950"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-08-23 06:36:10.417114"
},
"servers": [
{
@@ -29,49 +29,6 @@
}
],
"paths": {
- "/agentic_system/memory_bank/attach": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "AgenticSystem"
- ],
- "parameters": [
- {
- "name": "agent_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "session_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- }
- },
- "required": true
- }
- }
- },
"/inference/batch_chat_completion": {
"post": {
"responses": {
@@ -258,7 +215,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/AgenticSystemCreateRequest"
+ "$ref": "#/components/schemas/AgentConfig"
}
}
},
@@ -267,7 +224,7 @@
}
},
"/agentic_system/session/create": {
- "post": {
+ "get": {
"responses": {
"200": {
"description": "OK",
@@ -283,17 +240,24 @@
"tags": [
"AgenticSystem"
],
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/AgenticSystemSessionCreateRequest"
- }
+ "parameters": [
+ {
+ "name": "agent_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
}
},
- "required": true
- }
+ {
+ "name": "session_name",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
}
},
"/agentic_system/turn/create": {
@@ -383,23 +347,22 @@
"post": {
"responses": {
"200": {
- "description": "OK"
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/MemoryBank"
+ }
+ }
+ }
}
},
"tags": [
- "MemoryBanks"
+ "Memory"
],
"parameters": [
{
- "name": "bank_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "bank_name",
+ "name": "name",
"in": "query",
"required": true,
"schema": {
@@ -411,10 +374,7 @@
"content": {
"application/json": {
"schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/MemoryBankDocument"
- }
+ "$ref": "#/components/schemas/CreateMemoryBankRequest"
}
}
},
@@ -526,6 +486,41 @@
]
}
},
+ "/memory_bank/documents/delete": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "Memory"
+ ],
+ "parameters": [
+ {
+ "name": "bank_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/memory_banks/drop": {
"delete": {
"responses": {
@@ -541,7 +536,7 @@
}
},
"tags": [
- "MemoryBanks"
+ "Memory"
],
"parameters": [
{
@@ -555,91 +550,6 @@
]
}
},
- "/memory_bank/delete": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/jsonl": {
- "schema": {
- "type": "string"
- }
- }
- }
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "bank_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- }
- },
- "required": true
- }
- }
- },
- "/agentic_system/memory_bank/detach": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "AgenticSystem"
- ],
- "parameters": [
- {
- "name": "agent_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "session_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- }
- },
- "required": true
- }
- }
- },
"/evaluate/question_answering/": {
"post": {
"responses": {
@@ -927,6 +837,48 @@
]
}
},
+ "/memory_bank/documents/get": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/MemoryBankDocument"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Memory"
+ ],
+ "parameters": [
+ {
+ "name": "bank_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/evaluate/job/artifacts": {
"get": {
"responses": {
@@ -1099,7 +1051,7 @@
"200": {
"description": "OK",
"content": {
- "application/jsonl": {
+ "application/json": {
"schema": {
"$ref": "#/components/schemas/MemoryBank"
}
@@ -1108,7 +1060,7 @@
}
},
"tags": [
- "MemoryBanks"
+ "Memory"
],
"parameters": [
{
@@ -1122,68 +1074,6 @@
]
}
},
- "/memory_banks/list": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/jsonl": {
- "schema": {
- "$ref": "#/components/schemas/MemoryBank"
- }
- }
- }
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": []
- }
- },
- "/memory_bank/get": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/jsonl": {
- "schema": {
- "$ref": "#/components/schemas/MemoryBankDocument"
- }
- }
- }
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "bank_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- }
- },
- "required": true
- }
- }
- },
"/runs/metrics": {
"get": {
"responses": {
@@ -1328,7 +1218,7 @@
}
},
"tags": [
- "MemoryBanks"
+ "Memory"
],
"parameters": [
{
@@ -1404,6 +1294,26 @@
"parameters": []
}
},
+ "/memory_banks/list": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/MemoryBank"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Memory"
+ ],
+ "parameters": []
+ }
+ },
"/logging/log_messages": {
"post": {
"responses": {
@@ -1480,6 +1390,45 @@
}
}
},
+ "/memory_bank/query": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/QueryDocumentsResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Memory"
+ ],
+ "parameters": [
+ {
+ "name": "bank_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/QueryDocumentsRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/reward_scoring/score": {
"post": {
"responses": {
@@ -1570,6 +1519,41 @@
}
}
},
+ "/memory_bank/update": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "Memory"
+ ],
+ "parameters": [
+ {
+ "name": "bank_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MemoryBankDocument"
+ }
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/experiments/update": {
"post": {
"responses": {
@@ -1600,41 +1584,6 @@
}
}
},
- "/memory_bank/update": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "bank_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/MemoryBankDocument"
- }
- }
- }
- },
- "required": true
- }
- }
- },
"/runs/update": {
"post": {
"responses": {
@@ -1699,22 +1648,6 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": {
"schemas": {
- "Attachment": {
- "type": "object",
- "properties": {
- "url": {
- "$ref": "#/components/schemas/URL"
- },
- "mime_type": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "url",
- "mime_type"
- ]
- },
"BatchChatCompletionRequest": {
"type": "object",
"properties": {
@@ -1746,12 +1679,18 @@
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
},
- "available_tools": {
+ "tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
}
},
+ "tool_choice": {
+ "$ref": "#/components/schemas/ToolChoice"
+ },
+ "tool_prompt_format": {
+ "$ref": "#/components/schemas/ToolPromptFormat"
+ },
"logprobs": {
"type": "object",
"properties": {
@@ -1789,20 +1728,10 @@
{
"type": "string"
},
- {
- "$ref": "#/components/schemas/Attachment"
- },
{
"type": "array",
"items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
+ "type": "string"
}
}
]
@@ -1880,20 +1809,10 @@
{
"type": "string"
},
- {
- "$ref": "#/components/schemas/Attachment"
- },
{
"type": "array",
"items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
+ "type": "string"
}
}
]
@@ -1995,6 +1914,13 @@
"arguments"
]
},
+ "ToolChoice": {
+ "type": "string",
+ "enum": [
+ "auto",
+ "required"
+ ]
+ },
"ToolDefinition": {
"type": "object",
"properties": {
@@ -2041,6 +1967,15 @@
"param_type"
]
},
+ "ToolPromptFormat": {
+ "type": "string",
+ "enum": [
+ "json",
+ "function_tag"
+ ],
+ "title": "This Enum refers to the prompt format for calling zero shot tools",
+ "description": "`json` --\n Refers to the json format for calling tools.\n The json format takes the form like\n {\n \"type\": \"function\",\n \"function\" : {\n \"name\": \"function_name\",\n \"description\": \"function_description\",\n \"parameters\": {...}\n }\n }\n\n`function_tag` --\n This is an example of how you could define\n your own user defined format for making tool calls.\n The function_tag format looks like this,\n (parameters)\n\nThe detailed prompts for each of these formats are defined in `system_prompt.py`"
+ },
"ToolResponseMessage": {
"type": "object",
"properties": {
@@ -2066,20 +2001,10 @@
{
"type": "string"
},
- {
- "$ref": "#/components/schemas/Attachment"
- },
{
"type": "array",
"items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
+ "type": "string"
}
}
]
@@ -2093,11 +2018,6 @@
"content"
]
},
- "URL": {
- "type": "string",
- "format": "uri",
- "pattern": "^(https?://|file://|data:)"
- },
"UserMessage": {
"type": "object",
"properties": {
@@ -2110,20 +2030,10 @@
{
"type": "string"
},
- {
- "$ref": "#/components/schemas/Attachment"
- },
{
"type": "array",
"items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
+ "type": "string"
}
}
]
@@ -2163,20 +2073,10 @@
{
"type": "string"
},
- {
- "$ref": "#/components/schemas/Attachment"
- },
{
"type": "array",
"items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
+ "type": "string"
}
}
]
@@ -2244,12 +2144,18 @@
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
},
- "available_tools": {
+ "tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
}
},
+ "tool_choice": {
+ "$ref": "#/components/schemas/ToolChoice"
+ },
+ "tool_prompt_format": {
+ "$ref": "#/components/schemas/ToolPromptFormat"
+ },
"stream": {
"type": "boolean"
},
@@ -2381,20 +2287,10 @@
{
"type": "string"
},
- {
- "$ref": "#/components/schemas/Attachment"
- },
{
"type": "array",
"items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
+ "type": "string"
}
}
]
@@ -2443,102 +2339,160 @@
],
"title": "streamed completion response."
},
- "AgenticSystemCreateRequest": {
+ "AgentConfig": {
"type": "object",
"properties": {
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "tools": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/BraveSearchToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/WolframAlphaToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/PhotogenToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/CodeInterpreterToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/FunctionCallToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/MemoryToolDefinition"
+ }
+ ]
+ }
+ },
+ "tool_choice": {
+ "$ref": "#/components/schemas/ToolChoice"
+ },
+ "tool_prompt_format": {
+ "$ref": "#/components/schemas/ToolPromptFormat"
+ },
"model": {
"type": "string"
},
- "instance_config": {
- "$ref": "#/components/schemas/AgenticSystemInstanceConfig"
+ "instructions": {
+ "type": "string"
+ },
+ "memory_bank_configs": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "bank_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "vector"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "bank_id",
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "bank_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "keyvalue"
+ },
+ "keys": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "bank_id",
+ "type",
+ "keys"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "bank_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "keyword"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "bank_id",
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "bank_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "graph"
+ },
+ "entities": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "bank_id",
+ "type",
+ "entities"
+ ]
+ }
+ ]
+ }
}
},
"additionalProperties": false,
"required": [
"model",
- "instance_config"
- ]
- },
- "AgenticSystemInstanceConfig": {
- "type": "object",
- "properties": {
- "instructions": {
- "type": "string"
- },
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
- },
- "available_tools": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/AgenticSystemToolDefinition"
- }
- },
- "input_shields": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ShieldDefinition"
- }
- },
- "output_shields": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ShieldDefinition"
- }
- },
- "debug_prefix_messages": {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- }
- },
- "tool_prompt_format": {
- "$ref": "#/components/schemas/ToolPromptFormat"
- }
- },
- "additionalProperties": false,
- "required": [
"instructions"
]
},
- "AgenticSystemToolDefinition": {
+ "BraveSearchToolDefinition": {
"type": "object",
"properties": {
- "tool_name": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/BuiltinTool"
- },
- {
- "type": "string"
- }
- ]
- },
- "description": {
- "type": "string"
- },
- "parameters": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/components/schemas/ToolParamDefinition"
- }
- },
- "execution_config": {
- "$ref": "#/components/schemas/RestAPIExecutionConfig"
- },
"input_shields": {
"type": "array",
"items": {
@@ -2550,11 +2504,18 @@
"items": {
"$ref": "#/components/schemas/ShieldDefinition"
}
+ },
+ "type": {
+ "type": "string",
+ "const": "brave_search"
+ },
+ "remote_execution": {
+ "$ref": "#/components/schemas/RestAPIExecutionConfig"
}
},
"additionalProperties": false,
"required": [
- "tool_name"
+ "type"
]
},
"BuiltinShield": {
@@ -2567,6 +2528,189 @@
"jailbreak_shield"
]
},
+ "CodeInterpreterToolDefinition": {
+ "type": "object",
+ "properties": {
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "type": {
+ "type": "string",
+ "const": "code_interpreter"
+ },
+ "enable_inline_code_execution": {
+ "type": "boolean"
+ },
+ "remote_execution": {
+ "$ref": "#/components/schemas/RestAPIExecutionConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "enable_inline_code_execution"
+ ]
+ },
+ "FunctionCallToolDefinition": {
+ "type": "object",
+ "properties": {
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "type": {
+ "type": "string",
+ "const": "function_call"
+ },
+ "description": {
+ "type": "string"
+ },
+ "parameters": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/ToolParamDefinition"
+ }
+ },
+ "remote_execution": {
+ "$ref": "#/components/schemas/RestAPIExecutionConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "description",
+ "parameters"
+ ]
+ },
+ "MemoryBank": {
+ "type": "object",
+ "properties": {
+ "bank_id": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "config": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "vector"
+ },
+ "embedding_model": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "embedding_model"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "keyvalue"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "keyword"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "graph"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ }
+ ]
+ },
+ "url": {
+ "$ref": "#/components/schemas/URL"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "bank_id",
+ "name",
+ "config"
+ ]
+ },
+ "MemoryToolDefinition": {
+ "type": "object",
+ "properties": {
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "type": {
+ "type": "string",
+ "const": "memory"
+ },
+ "memory_banks": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MemoryBank"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "memory_banks"
+ ]
+ },
"OnViolationAction": {
"type": "integer",
"enum": [
@@ -2575,6 +2719,34 @@
2
]
},
+ "PhotogenToolDefinition": {
+ "type": "object",
+ "properties": {
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "type": {
+ "type": "string",
+ "const": "photogen"
+ },
+ "remote_execution": {
+ "$ref": "#/components/schemas/RestAPIExecutionConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
"RestAPIExecutionConfig": {
"type": "object",
"properties": {
@@ -2653,41 +2825,49 @@
"on_violation_action"
]
},
- "ToolPromptFormat": {
+ "URL": {
"type": "string",
- "enum": [
- "json",
- "function_tag"
- ],
- "title": "This Enum refers to the prompt format for calling zero shot tools",
- "description": "`json` --\n Refers to the json format for calling tools.\n The json format takes the form like\n {\n \"type\": \"function\",\n \"function\" : {\n \"name\": \"function_name\",\n \"description\": \"function_description\",\n \"parameters\": {...}\n }\n }\n\n`function_tag` --\n This is an example of how you could define\n your own user defined format for making tool calls.\n The function_tag format looks like this,\n (parameters)\n\nThe detailed prompts for each of these formats are defined in `system_prompt.py`"
+ "format": "uri",
+ "pattern": "^(https?://|file://|data:)"
+ },
+ "WolframAlphaToolDefinition": {
+ "type": "object",
+ "properties": {
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "type": {
+ "type": "string",
+ "const": "wolfram_alpha"
+ },
+ "remote_execution": {
+ "$ref": "#/components/schemas/RestAPIExecutionConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
},
"AgenticSystemCreateResponse": {
"type": "object",
"properties": {
- "system_id": {
+ "agent_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
- "system_id"
- ]
- },
- "AgenticSystemSessionCreateRequest": {
- "type": "object",
- "properties": {
- "system_id": {
- "type": "string"
- },
- "session_name": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "system_id",
- "session_name"
+ "agent_id"
]
},
"AgenticSystemSessionCreateResponse": {
@@ -2705,7 +2885,56 @@
"AgenticSystemTurnCreateRequest": {
"type": "object",
"properties": {
- "system_id": {
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ShieldDefinition"
+ }
+ },
+ "tools": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/BraveSearchToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/WolframAlphaToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/PhotogenToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/CodeInterpreterToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/FunctionCallToolDefinition"
+ },
+ {
+ "$ref": "#/components/schemas/MemoryToolDefinition"
+ }
+ ]
+ }
+ },
+ "tool_choice": {
+ "$ref": "#/components/schemas/ToolChoice"
+ },
+ "tool_prompt_format": {
+ "$ref": "#/components/schemas/ToolPromptFormat"
+ },
+ "instructions": {
+ "type": "string"
+ },
+ "agent_id": {
"type": "string"
},
"session_id": {
@@ -2724,20 +2953,39 @@
]
}
},
+ "attachments": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Attachment"
+ }
+ },
"stream": {
"type": "boolean"
- },
- "override_config": {
- "$ref": "#/components/schemas/AgenticSystemInstanceConfig"
}
},
"additionalProperties": false,
"required": [
- "system_id",
+ "agent_id",
"session_id",
"messages"
]
},
+ "Attachment": {
+ "type": "object",
+ "properties": {
+ "url": {
+ "$ref": "#/components/schemas/URL"
+ },
+ "mime_type": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "url",
+ "mime_type"
+ ]
+ },
"AgenticSystemTurnResponseStreamChunk": {
"description": "Server side event (SSE) stream of these events"
},
@@ -2915,51 +3163,76 @@
"failed"
]
},
- "MemoryBankDocument": {
+ "CreateMemoryBankRequest": {
"type": "object",
"properties": {
- "document_id": {
- "type": "string"
+ "config": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "vector"
+ },
+ "embedding_model": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "embedding_model"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "keyvalue"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "keyword"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "graph"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ }
+ ]
},
- "content": {
- "type": "string",
- "contentEncoding": "base64"
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- },
- "mime_type": {
- "type": "string"
+ "url": {
+ "$ref": "#/components/schemas/URL"
}
},
"additionalProperties": false,
"required": [
- "document_id",
- "content",
- "metadata",
- "mime_type"
+ "config"
]
},
"CreateRunRequest": {
@@ -3211,6 +3484,65 @@
"model_response"
]
},
+ "MemoryBankDocument": {
+ "type": "object",
+ "properties": {
+ "document_id": {
+ "type": "string"
+ },
+ "content": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ {
+ "$ref": "#/components/schemas/URL"
+ }
+ ]
+ },
+ "mime_type": {
+ "type": "string"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "document_id",
+ "content",
+ "mime_type",
+ "metadata"
+ ]
+ },
"MemoryRetrievalStep": {
"type": "object",
"properties": {
@@ -3416,20 +3748,10 @@
{
"type": "string"
},
- {
- "$ref": "#/components/schemas/Attachment"
- },
{
"type": "array",
"items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "$ref": "#/components/schemas/Attachment"
- }
- ]
+ "type": "string"
}
}
]
@@ -3486,6 +3808,12 @@
"output_message": {
"$ref": "#/components/schemas/CompletionMessage"
},
+ "output_attachments": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Attachment"
+ }
+ },
"started_at": {
"type": "string",
"format": "date-time"
@@ -3502,6 +3830,7 @@
"input_messages",
"steps",
"output_message",
+ "output_attachments",
"started_at"
],
"title": "A single turn in an interaction with an Agentic System."
@@ -3720,22 +4049,6 @@
"additional_info"
]
},
- "MemoryBank": {
- "type": "object",
- "properties": {
- "memory_bank_id": {
- "type": "string"
- },
- "memory_bank_name": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "memory_bank_id",
- "memory_bank_name"
- ]
- },
"Metric": {
"type": "object",
"properties": {
@@ -4121,6 +4434,98 @@
"fsdp_cpu_offload"
]
},
+ "QueryDocumentsRequest": {
+ "type": "object",
+ "properties": {
+ "query": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "params": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "query"
+ ]
+ },
+ "QueryDocumentsResponse": {
+ "type": "object",
+ "properties": {
+ "chunks": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "token_count": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content",
+ "token_count"
+ ]
+ }
+ },
+ "scores": {
+ "type": "array",
+ "items": {
+ "type": "number"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "chunks",
+ "scores"
+ ]
+ },
"DialogGenerations": {
"type": "object",
"properties": {
@@ -4703,12 +5108,6 @@
}
],
"tags": [
- {
- "name": "RewardScoring"
- },
- {
- "name": "Datasets"
- },
{
"name": "Observability"
},
@@ -4716,24 +5115,26 @@
"name": "AgenticSystem"
},
{
- "name": "Inference"
+ "name": "PostTraining"
+ },
+ {
+ "name": "Memory"
},
{
"name": "Evaluations"
},
+ {
+ "name": "Datasets"
+ },
+ {
+ "name": "RewardScoring"
+ },
+ {
+ "name": "Inference"
+ },
{
"name": "SyntheticDataGeneration"
},
- {
- "name": "PostTraining"
- },
- {
- "name": "MemoryBanks"
- },
- {
- "name": "Attachment",
- "description": ""
- },
{
"name": "BatchChatCompletionRequest",
"description": ""
@@ -4766,6 +5167,10 @@
"name": "ToolCall",
"description": ""
},
+ {
+ "name": "ToolChoice",
+ "description": ""
+ },
{
"name": "ToolDefinition",
"description": ""
@@ -4775,12 +5180,12 @@
"description": ""
},
{
- "name": "ToolResponseMessage",
- "description": ""
+ "name": "ToolPromptFormat",
+ "description": "This Enum refers to the prompt format for calling zero shot tools\n\n`json` --\n Refers to the json format for calling tools.\n The json format takes the form like\n {\n \"type\": \"function\",\n \"function\" : {\n \"name\": \"function_name\",\n \"description\": \"function_description\",\n \"parameters\": {...}\n }\n }\n\n`function_tag` --\n This is an example of how you could define\n your own user defined format for making tool calls.\n The function_tag format looks like this,\n (parameters)\n\nThe detailed prompts for each of these formats are defined in `system_prompt.py`\n\n"
},
{
- "name": "URL",
- "description": ""
+ "name": "ToolResponseMessage",
+ "description": ""
},
{
"name": "UserMessage",
@@ -4835,25 +5240,41 @@
"description": "streamed completion response.\n\n"
},
{
- "name": "AgenticSystemCreateRequest",
- "description": ""
+ "name": "AgentConfig",
+ "description": ""
},
{
- "name": "AgenticSystemInstanceConfig",
- "description": ""
- },
- {
- "name": "AgenticSystemToolDefinition",
- "description": ""
+ "name": "BraveSearchToolDefinition",
+ "description": ""
},
{
"name": "BuiltinShield",
"description": ""
},
+ {
+ "name": "CodeInterpreterToolDefinition",
+ "description": ""
+ },
+ {
+ "name": "FunctionCallToolDefinition",
+ "description": ""
+ },
+ {
+ "name": "MemoryBank",
+ "description": ""
+ },
+ {
+ "name": "MemoryToolDefinition",
+ "description": ""
+ },
{
"name": "OnViolationAction",
"description": ""
},
+ {
+ "name": "PhotogenToolDefinition",
+ "description": ""
+ },
{
"name": "RestAPIExecutionConfig",
"description": ""
@@ -4867,17 +5288,17 @@
"description": ""
},
{
- "name": "ToolPromptFormat",
- "description": "This Enum refers to the prompt format for calling zero shot tools\n\n`json` --\n Refers to the json format for calling tools.\n The json format takes the form like\n {\n \"type\": \"function\",\n \"function\" : {\n \"name\": \"function_name\",\n \"description\": \"function_description\",\n \"parameters\": {...}\n }\n }\n\n`function_tag` --\n This is an example of how you could define\n your own user defined format for making tool calls.\n The function_tag format looks like this,\n (parameters)\n\nThe detailed prompts for each of these formats are defined in `system_prompt.py`\n\n"
+ "name": "URL",
+ "description": ""
+ },
+ {
+ "name": "WolframAlphaToolDefinition",
+ "description": ""
},
{
"name": "AgenticSystemCreateResponse",
"description": ""
},
- {
- "name": "AgenticSystemSessionCreateRequest",
- "description": ""
- },
{
"name": "AgenticSystemSessionCreateResponse",
"description": ""
@@ -4886,6 +5307,10 @@
"name": "AgenticSystemTurnCreateRequest",
"description": ""
},
+ {
+ "name": "Attachment",
+ "description": ""
+ },
{
"name": "AgenticSystemTurnResponseStreamChunk",
"description": "Server side event (SSE) stream of these events\n\n"
@@ -4915,8 +5340,8 @@
"description": ""
},
{
- "name": "MemoryBankDocument",
- "description": ""
+ "name": "CreateMemoryBankRequest",
+ "description": ""
},
{
"name": "CreateRunRequest",
@@ -4950,6 +5375,10 @@
"name": "InferenceStep",
"description": ""
},
+ {
+ "name": "MemoryBankDocument",
+ "description": ""
+ },
{
"name": "MemoryRetrievalStep",
"description": ""
@@ -5010,10 +5439,6 @@
"name": "Log",
"description": ""
},
- {
- "name": "MemoryBank",
- "description": ""
- },
{
"name": "Metric",
"description": ""
@@ -5066,6 +5491,14 @@
"name": "TrainingConfig",
"description": ""
},
+ {
+ "name": "QueryDocumentsRequest",
+ "description": ""
+ },
+ {
+ "name": "QueryDocumentsResponse",
+ "description": ""
+ },
{
"name": "DialogGenerations",
"description": ""
@@ -5135,7 +5568,7 @@
"Datasets",
"Evaluations",
"Inference",
- "MemoryBanks",
+ "Memory",
"Observability",
"PostTraining",
"RewardScoring",
@@ -5145,13 +5578,10 @@
{
"name": "Types",
"tags": [
- "AgenticSystemCreateRequest",
+ "AgentConfig",
"AgenticSystemCreateResponse",
- "AgenticSystemInstanceConfig",
- "AgenticSystemSessionCreateRequest",
"AgenticSystemSessionCreateResponse",
"AgenticSystemStepResponse",
- "AgenticSystemToolDefinition",
"AgenticSystemTurnCreateRequest",
"AgenticSystemTurnResponseStreamChunk",
"Artifact",
@@ -5161,6 +5591,7 @@
"BatchChatCompletionResponse",
"BatchCompletionRequest",
"BatchCompletionResponse",
+ "BraveSearchToolDefinition",
"BuiltinShield",
"BuiltinTool",
"ChatCompletionRequest",
@@ -5168,11 +5599,13 @@
"ChatCompletionResponseEventType",
"ChatCompletionResponseStreamChunk",
"Checkpoint",
+ "CodeInterpreterToolDefinition",
"CompletionMessage",
"CompletionRequest",
"CompletionResponseStreamChunk",
"CreateDatasetRequest",
"CreateExperimentRequest",
+ "CreateMemoryBankRequest",
"CreateRunRequest",
"DPOAlignmentConfig",
"DialogGenerations",
@@ -5187,6 +5620,7 @@
"Experiment",
"ExperimentStatus",
"FinetuningAlgorithm",
+ "FunctionCallToolDefinition",
"InferenceStep",
"Log",
"LogMessagesRequest",
@@ -5196,9 +5630,11 @@
"MemoryBank",
"MemoryBankDocument",
"MemoryRetrievalStep",
+ "MemoryToolDefinition",
"Metric",
"OnViolationAction",
"OptimizerConfig",
+ "PhotogenToolDefinition",
"PostTrainingJob",
"PostTrainingJobArtifactsResponse",
"PostTrainingJobLogStream",
@@ -5207,6 +5643,8 @@
"PostTrainingRLHFRequest",
"PostTrainingSFTRequest",
"QLoraFinetuningConfig",
+ "QueryDocumentsRequest",
+ "QueryDocumentsResponse",
"RLHFAlgorithm",
"RestAPIExecutionConfig",
"RestAPIMethod",
@@ -5229,6 +5667,7 @@
"ToolCall",
"ToolCallDelta",
"ToolCallParseStatus",
+ "ToolChoice",
"ToolDefinition",
"ToolExecutionStep",
"ToolParamDefinition",
@@ -5243,7 +5682,8 @@
"UpdateExperimentRequest",
"UpdateRunRequest",
"UploadArtifactRequest",
- "UserMessage"
+ "UserMessage",
+ "WolframAlphaToolDefinition"
]
}
]
diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
index 7cfb22669..019790a62 100644
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
@@ -1,67 +1,106 @@
components:
responses: {}
schemas:
- AgenticSystemCreateRequest:
+ AgentConfig:
additionalProperties: false
properties:
- instance_config:
- $ref: '#/components/schemas/AgenticSystemInstanceConfig'
- model:
- type: string
- required:
- - model
- - instance_config
- type: object
- AgenticSystemCreateResponse:
- additionalProperties: false
- properties:
- system_id:
- type: string
- required:
- - system_id
- type: object
- AgenticSystemInstanceConfig:
- additionalProperties: false
- properties:
- available_tools:
- items:
- $ref: '#/components/schemas/AgenticSystemToolDefinition'
- type: array
- debug_prefix_messages:
- items:
- oneOf:
- - $ref: '#/components/schemas/UserMessage'
- - $ref: '#/components/schemas/SystemMessage'
- - $ref: '#/components/schemas/ToolResponseMessage'
- - $ref: '#/components/schemas/CompletionMessage'
- type: array
input_shields:
items:
$ref: '#/components/schemas/ShieldDefinition'
type: array
instructions:
type: string
+ memory_bank_configs:
+ items:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ bank_id:
+ type: string
+ type:
+ const: vector
+ type: string
+ required:
+ - bank_id
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ bank_id:
+ type: string
+ keys:
+ items:
+ type: string
+ type: array
+ type:
+ const: keyvalue
+ type: string
+ required:
+ - bank_id
+ - type
+ - keys
+ type: object
+ - additionalProperties: false
+ properties:
+ bank_id:
+ type: string
+ type:
+ const: keyword
+ type: string
+ required:
+ - bank_id
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ bank_id:
+ type: string
+ entities:
+ items:
+ type: string
+ type: array
+ type:
+ const: graph
+ type: string
+ required:
+ - bank_id
+ - type
+ - entities
+ type: object
+ type: array
+ model:
+ type: string
output_shields:
items:
$ref: '#/components/schemas/ShieldDefinition'
type: array
sampling_params:
$ref: '#/components/schemas/SamplingParams'
+ tool_choice:
+ $ref: '#/components/schemas/ToolChoice'
tool_prompt_format:
$ref: '#/components/schemas/ToolPromptFormat'
+ tools:
+ items:
+ oneOf:
+ - $ref: '#/components/schemas/BraveSearchToolDefinition'
+ - $ref: '#/components/schemas/WolframAlphaToolDefinition'
+ - $ref: '#/components/schemas/PhotogenToolDefinition'
+ - $ref: '#/components/schemas/CodeInterpreterToolDefinition'
+ - $ref: '#/components/schemas/FunctionCallToolDefinition'
+ - $ref: '#/components/schemas/MemoryToolDefinition'
+ type: array
required:
+ - model
- instructions
type: object
- AgenticSystemSessionCreateRequest:
+ AgenticSystemCreateResponse:
additionalProperties: false
properties:
- session_name:
- type: string
- system_id:
+ agent_id:
type: string
required:
- - system_id
- - session_name
+ - agent_id
type: object
AgenticSystemSessionCreateResponse:
additionalProperties: false
@@ -83,51 +122,53 @@ components:
required:
- step
type: object
- AgenticSystemToolDefinition:
+ AgenticSystemTurnCreateRequest:
additionalProperties: false
properties:
- description:
+ agent_id:
type: string
- execution_config:
- $ref: '#/components/schemas/RestAPIExecutionConfig'
+ attachments:
+ items:
+ $ref: '#/components/schemas/Attachment'
+ type: array
input_shields:
items:
$ref: '#/components/schemas/ShieldDefinition'
type: array
- output_shields:
- items:
- $ref: '#/components/schemas/ShieldDefinition'
- type: array
- parameters:
- additionalProperties:
- $ref: '#/components/schemas/ToolParamDefinition'
- type: object
- tool_name:
- oneOf:
- - $ref: '#/components/schemas/BuiltinTool'
- - type: string
- required:
- - tool_name
- type: object
- AgenticSystemTurnCreateRequest:
- additionalProperties: false
- properties:
+ instructions:
+ type: string
messages:
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
type: array
- override_config:
- $ref: '#/components/schemas/AgenticSystemInstanceConfig'
+ output_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ sampling_params:
+ $ref: '#/components/schemas/SamplingParams'
session_id:
type: string
stream:
type: boolean
- system_id:
- type: string
+ tool_choice:
+ $ref: '#/components/schemas/ToolChoice'
+ tool_prompt_format:
+ $ref: '#/components/schemas/ToolPromptFormat'
+ tools:
+ items:
+ oneOf:
+ - $ref: '#/components/schemas/BraveSearchToolDefinition'
+ - $ref: '#/components/schemas/WolframAlphaToolDefinition'
+ - $ref: '#/components/schemas/PhotogenToolDefinition'
+ - $ref: '#/components/schemas/CodeInterpreterToolDefinition'
+ - $ref: '#/components/schemas/FunctionCallToolDefinition'
+ - $ref: '#/components/schemas/MemoryToolDefinition'
+ type: array
required:
- - system_id
+ - agent_id
- session_id
- messages
type: object
@@ -190,10 +231,6 @@ components:
BatchChatCompletionRequest:
additionalProperties: false
properties:
- available_tools:
- items:
- $ref: '#/components/schemas/ToolDefinition'
- type: array
logprobs:
additionalProperties: false
properties:
@@ -214,6 +251,14 @@ components:
type: string
sampling_params:
$ref: '#/components/schemas/SamplingParams'
+ tool_choice:
+ $ref: '#/components/schemas/ToolChoice'
+ tool_prompt_format:
+ $ref: '#/components/schemas/ToolPromptFormat'
+ tools:
+ items:
+ $ref: '#/components/schemas/ToolDefinition'
+ type: array
required:
- model
- messages_batch
@@ -235,11 +280,8 @@ components:
items:
oneOf:
- type: string
- - $ref: '#/components/schemas/Attachment'
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
+ type: string
type: array
type: array
logprobs:
@@ -266,6 +308,25 @@ components:
required:
- completion_message_batch
type: object
+ BraveSearchToolDefinition:
+ additionalProperties: false
+ properties:
+ input_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ output_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ remote_execution:
+ $ref: '#/components/schemas/RestAPIExecutionConfig'
+ type:
+ const: brave_search
+ type: string
+ required:
+ - type
+ type: object
BuiltinShield:
enum:
- llama_guard
@@ -284,10 +345,6 @@ components:
ChatCompletionRequest:
additionalProperties: false
properties:
- available_tools:
- items:
- $ref: '#/components/schemas/ToolDefinition'
- type: array
logprobs:
additionalProperties: false
properties:
@@ -308,6 +365,14 @@ components:
$ref: '#/components/schemas/SamplingParams'
stream:
type: boolean
+ tool_choice:
+ $ref: '#/components/schemas/ToolChoice'
+ tool_prompt_format:
+ $ref: '#/components/schemas/ToolPromptFormat'
+ tools:
+ items:
+ $ref: '#/components/schemas/ToolDefinition'
+ type: array
required:
- model
- messages
@@ -349,17 +414,36 @@ components:
type: object
Checkpoint:
description: Checkpoint created during training runs
+ CodeInterpreterToolDefinition:
+ additionalProperties: false
+ properties:
+ enable_inline_code_execution:
+ type: boolean
+ input_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ output_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ remote_execution:
+ $ref: '#/components/schemas/RestAPIExecutionConfig'
+ type:
+ const: code_interpreter
+ type: string
+ required:
+ - type
+ - enable_inline_code_execution
+ type: object
CompletionMessage:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- - $ref: '#/components/schemas/Attachment'
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
+ type: string
type: array
role:
const: assistant
@@ -382,11 +466,8 @@ components:
content:
oneOf:
- type: string
- - $ref: '#/components/schemas/Attachment'
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
+ type: string
type: array
logprobs:
additionalProperties: false
@@ -449,6 +530,51 @@ components:
required:
- name
type: object
+ CreateMemoryBankRequest:
+ additionalProperties: false
+ properties:
+ config:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ embedding_model:
+ type: string
+ type:
+ const: vector
+ type: string
+ required:
+ - type
+ - embedding_model
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: keyvalue
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: keyword
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: graph
+ type: string
+ required:
+ - type
+ type: object
+ url:
+ $ref: '#/components/schemas/URL'
+ required:
+ - config
+ type: object
CreateRunRequest:
additionalProperties: false
properties:
@@ -688,6 +814,33 @@ components:
- qlora
- dora
type: string
+ FunctionCallToolDefinition:
+ additionalProperties: false
+ properties:
+ description:
+ type: string
+ input_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ output_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ parameters:
+ additionalProperties:
+ $ref: '#/components/schemas/ToolParamDefinition'
+ type: object
+ remote_execution:
+ $ref: '#/components/schemas/RestAPIExecutionConfig'
+ type:
+ const: function_call
+ type: string
+ required:
+ - type
+ - description
+ - parameters
+ type: object
InferenceStep:
additionalProperties: false
properties:
@@ -806,20 +959,64 @@ components:
MemoryBank:
additionalProperties: false
properties:
- memory_bank_id:
+ bank_id:
type: string
- memory_bank_name:
+ config:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ embedding_model:
+ type: string
+ type:
+ const: vector
+ type: string
+ required:
+ - type
+ - embedding_model
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: keyvalue
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: keyword
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: graph
+ type: string
+ required:
+ - type
+ type: object
+ name:
type: string
+ url:
+ $ref: '#/components/schemas/URL'
required:
- - memory_bank_id
- - memory_bank_name
+ - bank_id
+ - name
+ - config
type: object
MemoryBankDocument:
additionalProperties: false
properties:
content:
- contentEncoding: base64
- type: string
+ oneOf:
+ - type: string
+ - items:
+ type: string
+ type: array
+ - $ref: '#/components/schemas/URL'
document_id:
type: string
metadata:
@@ -837,8 +1034,8 @@ components:
required:
- document_id
- content
- - metadata
- mime_type
+ - metadata
type: object
MemoryRetrievalStep:
additionalProperties: false
@@ -876,6 +1073,28 @@ components:
- documents
- scores
type: object
+ MemoryToolDefinition:
+ additionalProperties: false
+ properties:
+ input_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ memory_banks:
+ items:
+ $ref: '#/components/schemas/MemoryBank'
+ type: array
+ output_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ type:
+ const: memory
+ type: string
+ required:
+ - type
+ - memory_banks
+ type: object
Metric:
additionalProperties: false
properties:
@@ -925,6 +1144,25 @@ components:
- lr_min
- weight_decay
type: object
+ PhotogenToolDefinition:
+ additionalProperties: false
+ properties:
+ input_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ output_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ remote_execution:
+ $ref: '#/components/schemas/RestAPIExecutionConfig'
+ type:
+ const: photogen
+ type: string
+ required:
+ - type
+ type: object
PostTrainingJob:
additionalProperties: false
properties:
@@ -1133,6 +1371,56 @@ components:
- rank
- alpha
type: object
+ QueryDocumentsRequest:
+ additionalProperties: false
+ properties:
+ params:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ query:
+ oneOf:
+ - type: string
+ - items:
+ type: string
+ type: array
+ required:
+ - query
+ type: object
+ QueryDocumentsResponse:
+ additionalProperties: false
+ properties:
+ chunks:
+ items:
+ additionalProperties: false
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - items:
+ type: string
+ type: array
+ token_count:
+ type: integer
+ required:
+ - content
+ - token_count
+ type: object
+ type: array
+ scores:
+ items:
+ type: number
+ type: array
+ required:
+ - chunks
+ - scores
+ type: object
RLHFAlgorithm:
enum:
- dpo
@@ -1430,11 +1718,8 @@ components:
content:
oneOf:
- type: string
- - $ref: '#/components/schemas/Attachment'
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
+ type: string
type: array
role:
const: system
@@ -1512,6 +1797,11 @@ components:
- failure
- success
type: string
+ ToolChoice:
+ enum:
+ - auto
+ - required
+ type: string
ToolDefinition:
additionalProperties: false
properties:
@@ -1593,11 +1883,8 @@ components:
content:
oneOf:
- type: string
- - $ref: '#/components/schemas/Attachment'
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
+ type: string
type: array
tool_name:
oneOf:
@@ -1616,11 +1903,8 @@ components:
content:
oneOf:
- type: string
- - $ref: '#/components/schemas/Attachment'
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
+ type: string
type: array
role:
const: ipython
@@ -1705,6 +1989,10 @@ components:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
type: array
+ output_attachments:
+ items:
+ $ref: '#/components/schemas/Attachment'
+ type: array
output_message:
$ref: '#/components/schemas/CompletionMessage'
session_id:
@@ -1728,6 +2016,7 @@ components:
- input_messages
- steps
- output_message
+ - output_attachments
- started_at
title: A single turn in an interaction with an Agentic System.
type: object
@@ -1812,11 +2101,8 @@ components:
content:
oneOf:
- type: string
- - $ref: '#/components/schemas/Attachment'
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
+ type: string
type: array
role:
const: user
@@ -1825,11 +2111,30 @@ components:
- role
- content
type: object
+ WolframAlphaToolDefinition:
+ additionalProperties: false
+ properties:
+ input_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ output_shields:
+ items:
+ $ref: '#/components/schemas/ShieldDefinition'
+ type: array
+ remote_execution:
+ $ref: '#/components/schemas/RestAPIExecutionConfig'
+ type:
+ const: wolfram_alpha
+ type: string
+ required:
+ - type
+ type: object
info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-08-21 14:16:38.313950"
+ \ draft and subject to change.\n Generated at 2024-08-23 06:36:10.417114"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -1842,7 +2147,7 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/AgenticSystemCreateRequest'
+ $ref: '#/components/schemas/AgentConfig'
required: true
responses:
'200':
@@ -1866,67 +2171,19 @@ paths:
description: OK
tags:
- AgenticSystem
- /agentic_system/memory_bank/attach:
- post:
- parameters:
- - in: query
- name: agent_id
- required: true
- schema:
- type: string
- - in: query
- name: session_id
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- items:
- type: string
- type: array
- required: true
- responses:
- '200':
- description: OK
- tags:
- - AgenticSystem
- /agentic_system/memory_bank/detach:
- post:
- parameters:
- - in: query
- name: agent_id
- required: true
- schema:
- type: string
- - in: query
- name: session_id
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- items:
- type: string
- type: array
- required: true
- responses:
- '200':
- description: OK
- tags:
- - AgenticSystem
/agentic_system/session/create:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/AgenticSystemSessionCreateRequest'
+ get:
+ parameters:
+ - in: query
+ name: agent_id
required: true
+ schema:
+ type: string
+ - in: query
+ name: session_name
+ required: true
+ schema:
+ type: string
responses:
'200':
content:
@@ -2466,7 +2723,7 @@ paths:
description: OK
tags:
- Observability
- /memory_bank/delete:
+ /memory_bank/documents/delete:
post:
parameters:
- in: query
@@ -2484,14 +2741,10 @@ paths:
required: true
responses:
'200':
- content:
- application/jsonl:
- schema:
- type: string
description: OK
tags:
- - MemoryBanks
- /memory_bank/get:
+ - Memory
+ /memory_bank/documents/get:
post:
parameters:
- in: query
@@ -2515,7 +2768,7 @@ paths:
$ref: '#/components/schemas/MemoryBankDocument'
description: OK
tags:
- - MemoryBanks
+ - Memory
/memory_bank/insert:
post:
parameters:
@@ -2536,7 +2789,30 @@ paths:
'200':
description: OK
tags:
- - MemoryBanks
+ - Memory
+ /memory_bank/query:
+ post:
+ parameters:
+ - in: query
+ name: bank_id
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryDocumentsRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryDocumentsResponse'
+ description: OK
+ tags:
+ - Memory
/memory_bank/update:
post:
parameters:
@@ -2557,17 +2833,12 @@ paths:
'200':
description: OK
tags:
- - MemoryBanks
+ - Memory
/memory_banks/create:
post:
parameters:
- in: query
- name: bank_id
- required: true
- schema:
- type: string
- - in: query
- name: bank_name
+ name: name
required: true
schema:
type: string
@@ -2575,15 +2846,17 @@ paths:
content:
application/json:
schema:
- items:
- $ref: '#/components/schemas/MemoryBankDocument'
- type: array
+ $ref: '#/components/schemas/CreateMemoryBankRequest'
required: true
responses:
'200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/MemoryBank'
description: OK
tags:
- - MemoryBanks
+ - Memory
/memory_banks/drop:
delete:
parameters:
@@ -2600,7 +2873,7 @@ paths:
type: string
description: OK
tags:
- - MemoryBanks
+ - Memory
/memory_banks/get:
get:
parameters:
@@ -2612,12 +2885,12 @@ paths:
responses:
'200':
content:
- application/jsonl:
+ application/json:
schema:
$ref: '#/components/schemas/MemoryBank'
description: OK
tags:
- - MemoryBanks
+ - Memory
/memory_banks/list:
get:
parameters: []
@@ -2629,7 +2902,7 @@ paths:
$ref: '#/components/schemas/MemoryBank'
description: OK
tags:
- - MemoryBanks
+ - Memory
/post_training/job/artifacts:
get:
parameters:
@@ -2832,17 +3105,15 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
-- name: RewardScoring
-- name: Datasets
- name: Observability
- name: AgenticSystem
-- name: Inference
-- name: Evaluations
-- name: SyntheticDataGeneration
- name: PostTraining
-- name: MemoryBanks
-- description:
- name: Attachment
+- name: Memory
+- name: Evaluations
+- name: Datasets
+- name: RewardScoring
+- name: Inference
+- name: SyntheticDataGeneration
- description:
name: BatchChatCompletionRequest
@@ -2862,16 +3133,27 @@ tags:
name: SystemMessage
- description:
name: ToolCall
+- description:
+ name: ToolChoice
- description:
name: ToolDefinition
- description:
name: ToolParamDefinition
+- description: "This Enum refers to the prompt format for calling zero shot tools\n\
+ \n`json` --\n Refers to the json format for calling tools.\n The json format\
+ \ takes the form like\n {\n \"type\": \"function\",\n \"function\"\
+ \ : {\n \"name\": \"function_name\",\n \"description\":\
+ \ \"function_description\",\n \"parameters\": {...}\n }\n \
+ \ }\n\n`function_tag` --\n This is an example of how you could define\n \
+ \ your own user defined format for making tool calls.\n The function_tag format\
+ \ looks like this,\n (parameters)\n\nThe\
+ \ detailed prompts for each of these formats are defined in `system_prompt.py`\n\
+ \n"
+ name: ToolPromptFormat
- description:
name: ToolResponseMessage
-- description:
- name: URL
- description:
name: UserMessage
- description: '
name: CompletionResponseStreamChunk
-- description:
+ name: AgentConfig
+- description:
- name: AgenticSystemCreateRequest
-- description:
- name: AgenticSystemInstanceConfig
-- description:
- name: AgenticSystemToolDefinition
+ name: BraveSearchToolDefinition
- description:
name: BuiltinShield
+- description:
+ name: CodeInterpreterToolDefinition
+- description:
+ name: FunctionCallToolDefinition
+- description:
+ name: MemoryBank
+- description:
+ name: MemoryToolDefinition
- description:
name: OnViolationAction
+- description:
+ name: PhotogenToolDefinition
- description:
name: RestAPIExecutionConfig
@@ -2939,29 +3231,22 @@ tags:
- description:
name: ShieldDefinition
-- description: "This Enum refers to the prompt format for calling zero shot tools\n\
- \n`json` --\n Refers to the json format for calling tools.\n The json format\
- \ takes the form like\n {\n \"type\": \"function\",\n \"function\"\
- \ : {\n \"name\": \"function_name\",\n \"description\":\
- \ \"function_description\",\n \"parameters\": {...}\n }\n \
- \ }\n\n`function_tag` --\n This is an example of how you could define\n \
- \ your own user defined format for making tool calls.\n The function_tag format\
- \ looks like this,\n (parameters)\n\nThe\
- \ detailed prompts for each of these formats are defined in `system_prompt.py`\n\
- \n"
- name: ToolPromptFormat
+- description:
+ name: URL
+- description:
+ name: WolframAlphaToolDefinition
- description:
name: AgenticSystemCreateResponse
-- description:
- name: AgenticSystemSessionCreateRequest
- description:
name: AgenticSystemSessionCreateResponse
- description:
name: AgenticSystemTurnCreateRequest
+- description:
+ name: Attachment
- description: 'Server side event (SSE) stream of these events
@@ -2989,9 +3274,9 @@ tags:
- description:
name: ExperimentStatus
-- description:
- name: MemoryBankDocument
+ name: CreateMemoryBankRequest
- description:
name: CreateRunRequest
@@ -3024,6 +3309,9 @@ tags:
name: EvaluateTextGenerationRequest
- description:
name: InferenceStep
+- description:
+ name: MemoryBankDocument
- description:
name: MemoryRetrievalStep
@@ -3070,8 +3358,6 @@ tags:
name: LogSearchRequest
- description:
name: Log
-- description:
- name: MemoryBank
- description:
name: Metric
- description: 'Artifacts of a finetuning job.
@@ -3118,6 +3404,12 @@ tags:
name: RLHFAlgorithm
- description:
name: TrainingConfig
+- description:
+ name: QueryDocumentsRequest
+- description:
+ name: QueryDocumentsResponse
- description:
name: DialogGenerations
@@ -3185,20 +3477,17 @@ x-tagGroups:
- Datasets
- Evaluations
- Inference
- - MemoryBanks
+ - Memory
- Observability
- PostTraining
- RewardScoring
- SyntheticDataGeneration
- name: Types
tags:
- - AgenticSystemCreateRequest
+ - AgentConfig
- AgenticSystemCreateResponse
- - AgenticSystemInstanceConfig
- - AgenticSystemSessionCreateRequest
- AgenticSystemSessionCreateResponse
- AgenticSystemStepResponse
- - AgenticSystemToolDefinition
- AgenticSystemTurnCreateRequest
- AgenticSystemTurnResponseStreamChunk
- Artifact
@@ -3208,6 +3497,7 @@ x-tagGroups:
- BatchChatCompletionResponse
- BatchCompletionRequest
- BatchCompletionResponse
+ - BraveSearchToolDefinition
- BuiltinShield
- BuiltinTool
- ChatCompletionRequest
@@ -3215,11 +3505,13 @@ x-tagGroups:
- ChatCompletionResponseEventType
- ChatCompletionResponseStreamChunk
- Checkpoint
+ - CodeInterpreterToolDefinition
- CompletionMessage
- CompletionRequest
- CompletionResponseStreamChunk
- CreateDatasetRequest
- CreateExperimentRequest
+ - CreateMemoryBankRequest
- CreateRunRequest
- DPOAlignmentConfig
- DialogGenerations
@@ -3234,6 +3526,7 @@ x-tagGroups:
- Experiment
- ExperimentStatus
- FinetuningAlgorithm
+ - FunctionCallToolDefinition
- InferenceStep
- Log
- LogMessagesRequest
@@ -3243,9 +3536,11 @@ x-tagGroups:
- MemoryBank
- MemoryBankDocument
- MemoryRetrievalStep
+ - MemoryToolDefinition
- Metric
- OnViolationAction
- OptimizerConfig
+ - PhotogenToolDefinition
- PostTrainingJob
- PostTrainingJobArtifactsResponse
- PostTrainingJobLogStream
@@ -3254,6 +3549,8 @@ x-tagGroups:
- PostTrainingRLHFRequest
- PostTrainingSFTRequest
- QLoraFinetuningConfig
+ - QueryDocumentsRequest
+ - QueryDocumentsResponse
- RLHFAlgorithm
- RestAPIExecutionConfig
- RestAPIMethod
@@ -3276,6 +3573,7 @@ x-tagGroups:
- ToolCall
- ToolCallDelta
- ToolCallParseStatus
+ - ToolChoice
- ToolDefinition
- ToolExecutionStep
- ToolParamDefinition
@@ -3291,3 +3589,4 @@ x-tagGroups:
- UpdateRunRequest
- UploadArtifactRequest
- UserMessage
+ - WolframAlphaToolDefinition