mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
re-work tool definitions, fix FastAPI issues, fix tool regressions
This commit is contained in:
parent
8d14d4228b
commit
8efe614719
11 changed files with 144 additions and 104 deletions
|
@ -77,6 +77,7 @@ class FunctionCallToolDefinition(ToolDefinitionCommon):
|
||||||
type: Literal[AgenticSystemTool.function_call.value] = (
|
type: Literal[AgenticSystemTool.function_call.value] = (
|
||||||
AgenticSystemTool.function_call.value
|
AgenticSystemTool.function_call.value
|
||||||
)
|
)
|
||||||
|
function_name: str
|
||||||
description: str
|
description: str
|
||||||
parameters: Dict[str, ToolParamDefinition]
|
parameters: Dict[str, ToolParamDefinition]
|
||||||
remote_execution: Optional[RestAPIExecutionConfig] = None
|
remote_execution: Optional[RestAPIExecutionConfig] = None
|
||||||
|
|
|
@ -6,49 +6,42 @@
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import (
|
from pydantic import BaseModel
|
||||||
BuiltinTool,
|
|
||||||
SamplingParams,
|
|
||||||
ToolParamDefinition,
|
|
||||||
ToolPromptFormat,
|
|
||||||
UserMessage,
|
|
||||||
)
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.agentic_system.event_logger import EventLogger
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from .api import (
|
from .api import * # noqa: F403
|
||||||
AgentConfig,
|
|
||||||
AgenticSystem,
|
from .event_logger import EventLogger
|
||||||
AgenticSystemCreateResponse,
|
|
||||||
AgenticSystemSessionCreateResponse,
|
|
||||||
AgenticSystemToolDefinition,
|
|
||||||
AgenticSystemTurnCreateRequest,
|
|
||||||
AgenticSystemTurnResponseStreamChunk,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def get_client_impl(base_url: str):
|
async def get_client_impl(base_url: str):
|
||||||
return AgenticSystemClient(base_url)
|
return AgenticSystemClient(base_url)
|
||||||
|
|
||||||
|
|
||||||
|
def encodable_dict(d: BaseModel):
|
||||||
|
return json.loads(d.json())
|
||||||
|
|
||||||
|
|
||||||
class AgenticSystemClient(AgenticSystem):
|
class AgenticSystemClient(AgenticSystem):
|
||||||
def __init__(self, base_url: str):
|
def __init__(self, base_url: str):
|
||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
|
|
||||||
async def create_agentic_system(
|
async def create_agentic_system(
|
||||||
self, request: AgenticSystemCreateRequest
|
self, agent_config: AgentConfig
|
||||||
) -> AgenticSystemCreateResponse:
|
) -> AgenticSystemCreateResponse:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{self.base_url}/agentic_system/create",
|
f"{self.base_url}/agentic_system/create",
|
||||||
data=request.json(),
|
json={
|
||||||
|
"agent_config": encodable_dict(agent_config),
|
||||||
|
},
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
@ -56,12 +49,16 @@ class AgenticSystemClient(AgenticSystem):
|
||||||
|
|
||||||
async def create_agentic_system_session(
|
async def create_agentic_system_session(
|
||||||
self,
|
self,
|
||||||
request: AgenticSystemSessionCreateRequest,
|
agent_id: str,
|
||||||
|
session_name: str,
|
||||||
) -> AgenticSystemSessionCreateResponse:
|
) -> AgenticSystemSessionCreateResponse:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{self.base_url}/agentic_system/session/create",
|
f"{self.base_url}/agentic_system/session/create",
|
||||||
data=request.json(),
|
json={
|
||||||
|
"agent_id": agent_id,
|
||||||
|
"session_name": session_name,
|
||||||
|
},
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
@ -75,7 +72,9 @@ class AgenticSystemClient(AgenticSystem):
|
||||||
async with client.stream(
|
async with client.stream(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.base_url}/agentic_system/turn/create",
|
f"{self.base_url}/agentic_system/turn/create",
|
||||||
data=request.json(),
|
json={
|
||||||
|
"request": encodable_dict(request),
|
||||||
|
},
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
timeout=20,
|
timeout=20,
|
||||||
) as response:
|
) as response:
|
||||||
|
@ -96,19 +95,13 @@ async def run_main(host: str, port: int):
|
||||||
api = AgenticSystemClient(f"http://{host}:{port}")
|
api = AgenticSystemClient(f"http://{host}:{port}")
|
||||||
|
|
||||||
tool_definitions = [
|
tool_definitions = [
|
||||||
AgenticSystemToolDefinition(
|
BraveSearchToolDefinition(),
|
||||||
tool_name=BuiltinTool.brave_search,
|
WolframAlphaToolDefinition(),
|
||||||
),
|
CodeInterpreterToolDefinition(),
|
||||||
AgenticSystemToolDefinition(
|
|
||||||
tool_name=BuiltinTool.wolfram_alpha,
|
|
||||||
),
|
|
||||||
AgenticSystemToolDefinition(
|
|
||||||
tool_name=BuiltinTool.code_interpreter,
|
|
||||||
),
|
|
||||||
]
|
]
|
||||||
tool_definitions += [
|
tool_definitions += [
|
||||||
AgenticSystemToolDefinition(
|
FunctionCallToolDefinition(
|
||||||
tool_name="get_boiling_point",
|
function_name="get_boiling_point",
|
||||||
description="Get the boiling point of a imaginary liquids (eg. polyjuice)",
|
description="Get the boiling point of a imaginary liquids (eg. polyjuice)",
|
||||||
parameters={
|
parameters={
|
||||||
"liquid_name": ToolParamDefinition(
|
"liquid_name": ToolParamDefinition(
|
||||||
|
@ -128,12 +121,10 @@ async def run_main(host: str, port: int):
|
||||||
agent_config = AgentConfig(
|
agent_config = AgentConfig(
|
||||||
model="Meta-Llama3.1-8B-Instruct",
|
model="Meta-Llama3.1-8B-Instruct",
|
||||||
instructions="You are a helpful assistant",
|
instructions="You are a helpful assistant",
|
||||||
sampling_params=SamplingParams(),
|
sampling_params=SamplingParams(temperature=1.0, top_p=0.9),
|
||||||
tools=tool_definitions,
|
tools=tool_definitions,
|
||||||
input_shields=[],
|
tool_choice=ToolChoice.auto,
|
||||||
output_shields=[],
|
tool_prompt_format=ToolPromptFormat.function_tag,
|
||||||
debug_prefix_messages=[],
|
|
||||||
tool_prompt_format=ToolPromptFormat.json,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
create_response = await api.create_agentic_system(agent_config)
|
create_response = await api.create_agentic_system(agent_config)
|
||||||
|
|
|
@ -10,8 +10,6 @@ import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import AsyncGenerator, List
|
from typing import AsyncGenerator, List
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import ToolPromptFormat
|
|
||||||
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.agentic_system.api import * # noqa: F403
|
from llama_toolchain.agentic_system.api import * # noqa: F403
|
||||||
|
@ -20,7 +18,10 @@ from llama_toolchain.memory.api import * # noqa: F403
|
||||||
from llama_toolchain.safety.api import * # noqa: F403
|
from llama_toolchain.safety.api import * # noqa: F403
|
||||||
|
|
||||||
from llama_toolchain.tools.base import BaseTool
|
from llama_toolchain.tools.base import BaseTool
|
||||||
from llama_toolchain.tools.builtin import SingleMessageBuiltinTool
|
from llama_toolchain.tools.builtin import (
|
||||||
|
interpret_content_as_attachment,
|
||||||
|
SingleMessageBuiltinTool,
|
||||||
|
)
|
||||||
|
|
||||||
from .safety import SafetyException, ShieldRunnerMixin
|
from .safety import SafetyException, ShieldRunnerMixin
|
||||||
|
|
||||||
|
@ -192,7 +193,7 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
yield res
|
yield res
|
||||||
|
|
||||||
async for res in self._run(
|
async for res in self._run(
|
||||||
turn_id, session, input_messages, attachments, sampling_params, stream
|
session, turn_id, input_messages, attachments, sampling_params, stream
|
||||||
):
|
):
|
||||||
if isinstance(res, bool):
|
if isinstance(res, bool):
|
||||||
return
|
return
|
||||||
|
@ -358,7 +359,7 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
req = ChatCompletionRequest(
|
req = ChatCompletionRequest(
|
||||||
model=self.agent_config.model,
|
model=self.agent_config.model,
|
||||||
messages=input_messages,
|
messages=input_messages,
|
||||||
tools=self.agent_config.tools,
|
tools=self._get_tools(),
|
||||||
tool_prompt_format=self.agent_config.tool_prompt_format,
|
tool_prompt_format=self.agent_config.tool_prompt_format,
|
||||||
stream=True,
|
stream=True,
|
||||||
sampling_params=sampling_params,
|
sampling_params=sampling_params,
|
||||||
|
@ -555,17 +556,13 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
yield False
|
yield False
|
||||||
return
|
return
|
||||||
|
|
||||||
if isinstance(result_message.content, Attachment):
|
if out_attachment := interpret_content_as_attachment(
|
||||||
|
result_message.content
|
||||||
|
):
|
||||||
# NOTE: when we push this message back to the model, the model may ignore the
|
# NOTE: when we push this message back to the model, the model may ignore the
|
||||||
# attached file path etc. since the model is trained to only provide a user message
|
# attached file path etc. since the model is trained to only provide a user message
|
||||||
# with the summary. We keep all generated attachments and then attach them to final message
|
# with the summary. We keep all generated attachments and then attach them to final message
|
||||||
output_attachments.append(result_message.content)
|
output_attachments.append(out_attachment)
|
||||||
elif isinstance(result_message.content, list) or isinstance(
|
|
||||||
result_message.content, tuple
|
|
||||||
):
|
|
||||||
for c in result_message.content:
|
|
||||||
if isinstance(c, Attachment):
|
|
||||||
output_attachments.append(c)
|
|
||||||
|
|
||||||
input_messages = input_messages + [message, result_message]
|
input_messages = input_messages + [message, result_message]
|
||||||
|
|
||||||
|
@ -667,6 +664,27 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
"\n=== END-RETRIEVED-CONTEXT ===\n",
|
"\n=== END-RETRIEVED-CONTEXT ===\n",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def _get_tools(self) -> List[ToolDefinition]:
|
||||||
|
ret = []
|
||||||
|
for t in self.agent_config.tools:
|
||||||
|
if isinstance(t, BraveSearchToolDefinition):
|
||||||
|
ret.append(ToolDefinition(tool_name=BuiltinTool.brave_search))
|
||||||
|
elif isinstance(t, WolframAlphaToolDefinition):
|
||||||
|
ret.append(ToolDefinition(tool_name=BuiltinTool.wolfram_alpha))
|
||||||
|
elif isinstance(t, PhotogenToolDefinition):
|
||||||
|
ret.append(ToolDefinition(tool_name=BuiltinTool.photogen))
|
||||||
|
elif isinstance(t, CodeInterpreterToolDefinition):
|
||||||
|
ret.append(ToolDefinition(tool_name=BuiltinTool.code_interpreter))
|
||||||
|
elif isinstance(t, FunctionCallToolDefinition):
|
||||||
|
ret.append(
|
||||||
|
ToolDefinition(
|
||||||
|
tool_name=t.function_name,
|
||||||
|
description=t.description,
|
||||||
|
parameters=t.parameters,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def attachment_message(urls: List[URL]) -> ToolResponseMessage:
|
def attachment_message(urls: List[URL]) -> ToolResponseMessage:
|
||||||
content = []
|
content = []
|
||||||
|
|
|
@ -12,7 +12,6 @@ from typing import AsyncGenerator, Dict
|
||||||
|
|
||||||
from llama_toolchain.distribution.datatypes import Api, ProviderSpec
|
from llama_toolchain.distribution.datatypes import Api, ProviderSpec
|
||||||
from llama_toolchain.inference.api import Inference
|
from llama_toolchain.inference.api import Inference
|
||||||
from llama_toolchain.inference.api.datatypes import BuiltinTool
|
|
||||||
from llama_toolchain.memory.api import Memory
|
from llama_toolchain.memory.api import Memory
|
||||||
from llama_toolchain.safety.api import Safety
|
from llama_toolchain.safety.api import Safety
|
||||||
from llama_toolchain.agentic_system.api import * # noqa: F403
|
from llama_toolchain.agentic_system.api import * # noqa: F403
|
||||||
|
@ -42,6 +41,7 @@ async def get_provider_impl(
|
||||||
impl = MetaReferenceAgenticSystemImpl(
|
impl = MetaReferenceAgenticSystemImpl(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
|
deps[Api.memory],
|
||||||
deps[Api.safety],
|
deps[Api.safety],
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
|
@ -56,54 +56,55 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
|
||||||
self,
|
self,
|
||||||
config: MetaReferenceImplConfig,
|
config: MetaReferenceImplConfig,
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
safety_api: Safety,
|
|
||||||
memory_api: Memory,
|
memory_api: Memory,
|
||||||
|
safety_api: Safety,
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.safety_api = safety_api
|
|
||||||
self.memory_api = memory_api
|
self.memory_api = memory_api
|
||||||
|
self.safety_api = safety_api
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def create_agentic_system(
|
async def create_agentic_system(
|
||||||
self,
|
self,
|
||||||
request: AgenticSystemCreateRequest,
|
agent_config: AgentConfig,
|
||||||
) -> AgenticSystemCreateResponse:
|
) -> AgenticSystemCreateResponse:
|
||||||
agent_id = str(uuid.uuid4())
|
agent_id = str(uuid.uuid4())
|
||||||
|
|
||||||
builtin_tools = []
|
builtin_tools = []
|
||||||
cfg = request.agent_config
|
for tool_defn in agent_config.tools:
|
||||||
for dfn in cfg.tools:
|
if isinstance(tool_defn, WolframAlphaToolDefinition):
|
||||||
if isinstance(dfn.tool_name, BuiltinTool):
|
key = self.config.wolfram_api_key
|
||||||
if dfn.tool_name == BuiltinTool.wolfram_alpha:
|
if not key:
|
||||||
key = self.config.wolfram_api_key
|
raise ValueError("Wolfram API key not defined in config")
|
||||||
if not key:
|
tool = WolframAlphaTool(key)
|
||||||
raise ValueError("Wolfram API key not defined in config")
|
elif isinstance(tool_defn, BraveSearchToolDefinition):
|
||||||
tool = WolframAlphaTool(key)
|
key = self.config.brave_search_api_key
|
||||||
elif dfn.tool_name == BuiltinTool.brave_search:
|
if not key:
|
||||||
key = self.config.brave_search_api_key
|
raise ValueError("Brave API key not defined in config")
|
||||||
if not key:
|
tool = BraveSearchTool(key)
|
||||||
raise ValueError("Brave API key not defined in config")
|
elif isinstance(tool_defn, CodeInterpreterToolDefinition):
|
||||||
tool = BraveSearchTool(key)
|
tool = CodeInterpreterTool()
|
||||||
elif dfn.tool_name == BuiltinTool.code_interpreter:
|
elif isinstance(tool_defn, PhotogenToolDefinition):
|
||||||
tool = CodeInterpreterTool()
|
tool = PhotogenTool(
|
||||||
elif dfn.tool_name == BuiltinTool.photogen:
|
dump_dir="/tmp/photogen_dump_" + os.environ["USER"],
|
||||||
tool = PhotogenTool(
|
|
||||||
dump_dir="/tmp/photogen_dump_" + os.environ["USER"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown builtin tool: {dfn.tool_name}")
|
|
||||||
|
|
||||||
builtin_tools.append(
|
|
||||||
with_safety(
|
|
||||||
tool, self.safety_api, dfn.input_shields, dfn.output_shields
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
builtin_tools.append(
|
||||||
|
with_safety(
|
||||||
|
tool,
|
||||||
|
self.safety_api,
|
||||||
|
tool_defn.input_shields,
|
||||||
|
tool_defn.output_shields,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
AGENT_INSTANCES_BY_ID[agent_id] = ChatAgent(
|
AGENT_INSTANCES_BY_ID[agent_id] = ChatAgent(
|
||||||
agent_config=cfg,
|
agent_config=agent_config,
|
||||||
inference_api=self.inference_api,
|
inference_api=self.inference_api,
|
||||||
safety_api=self.safety_api,
|
safety_api=self.safety_api,
|
||||||
memory_api=self.memory_api,
|
memory_api=self.memory_api,
|
||||||
|
@ -116,13 +117,13 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
|
||||||
|
|
||||||
async def create_agentic_system_session(
|
async def create_agentic_system_session(
|
||||||
self,
|
self,
|
||||||
request: AgenticSystemSessionCreateRequest,
|
agent_id: str,
|
||||||
|
session_name: str,
|
||||||
) -> AgenticSystemSessionCreateResponse:
|
) -> AgenticSystemSessionCreateResponse:
|
||||||
agent_id = request.agent_id
|
|
||||||
assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found"
|
assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found"
|
||||||
agent = AGENT_INSTANCES_BY_ID[agent_id]
|
agent = AGENT_INSTANCES_BY_ID[agent_id]
|
||||||
|
|
||||||
session = agent.create_session(request.session_name)
|
session = agent.create_session(session_name)
|
||||||
return AgenticSystemSessionCreateResponse(
|
return AgenticSystemSessionCreateResponse(
|
||||||
session_id=session.session_id,
|
session_id=session.session_id,
|
||||||
)
|
)
|
||||||
|
|
|
@ -52,12 +52,37 @@ def available_distribution_specs() -> List[DistributionSpec]:
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
DistributionSpec(
|
DistributionSpec(
|
||||||
spec_id="test-memory",
|
spec_id="test-agentic",
|
||||||
description="Just a test distribution spec for testing memory bank APIs",
|
description="Test agentic with others as remote",
|
||||||
provider_specs={
|
provider_specs={
|
||||||
|
Api.agentic_system: providers[Api.agentic_system]["meta-reference"],
|
||||||
|
Api.inference: remote_spec(Api.inference),
|
||||||
|
Api.memory: remote_spec(Api.memory),
|
||||||
|
Api.safety: remote_spec(Api.safety),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
DistributionSpec(
|
||||||
|
spec_id="test-inference",
|
||||||
|
description="Test inference provider",
|
||||||
|
provider_specs={
|
||||||
|
Api.inference: providers[Api.inference]["meta-reference"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
DistributionSpec(
|
||||||
|
spec_id="test-memory",
|
||||||
|
description="Test memory provider",
|
||||||
|
provider_specs={
|
||||||
|
Api.inference: providers[Api.inference]["meta-reference"],
|
||||||
Api.memory: providers[Api.memory]["meta-reference-faiss"],
|
Api.memory: providers[Api.memory]["meta-reference-faiss"],
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
DistributionSpec(
|
||||||
|
spec_id="test-safety",
|
||||||
|
description="Test safety provider",
|
||||||
|
provider_specs={
|
||||||
|
Api.safety: providers[Api.safety]["meta-reference"],
|
||||||
|
},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -214,7 +214,9 @@ def create_dynamic_typed_route(func: Any, method: str):
|
||||||
# and some in the body
|
# and some in the body
|
||||||
endpoint.__signature__ = sig.replace(
|
endpoint.__signature__ = sig.replace(
|
||||||
parameters=[
|
parameters=[
|
||||||
param.replace(annotation=Annotated[param.annotation, Body()])
|
param.replace(
|
||||||
|
annotation=Annotated[param.annotation, Body(..., embed=True)]
|
||||||
|
)
|
||||||
for param in sig.parameters.values()
|
for param in sig.parameters.values()
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
@ -10,6 +10,7 @@ from typing import AsyncGenerator
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
import httpx
|
import httpx
|
||||||
|
from pydantic import BaseModel
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from .api import (
|
from .api import (
|
||||||
|
@ -27,6 +28,10 @@ async def get_client_impl(base_url: str):
|
||||||
return InferenceClient(base_url)
|
return InferenceClient(base_url)
|
||||||
|
|
||||||
|
|
||||||
|
def encodable_dict(d: BaseModel):
|
||||||
|
return json.loads(d.json())
|
||||||
|
|
||||||
|
|
||||||
class InferenceClient(Inference):
|
class InferenceClient(Inference):
|
||||||
def __init__(self, base_url: str):
|
def __init__(self, base_url: str):
|
||||||
print(f"Initializing client for {base_url}")
|
print(f"Initializing client for {base_url}")
|
||||||
|
@ -46,7 +51,9 @@ class InferenceClient(Inference):
|
||||||
async with client.stream(
|
async with client.stream(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.base_url}/inference/chat_completion",
|
f"{self.base_url}/inference/chat_completion",
|
||||||
data=request.json(),
|
json={
|
||||||
|
"request": encodable_dict(request),
|
||||||
|
},
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
timeout=20,
|
timeout=20,
|
||||||
) as response:
|
) as response:
|
||||||
|
|
|
@ -160,7 +160,6 @@ class FaissMemoryImpl(Memory):
|
||||||
config: MemoryBankConfig,
|
config: MemoryBankConfig,
|
||||||
url: Optional[URL] = None,
|
url: Optional[URL] = None,
|
||||||
) -> MemoryBank:
|
) -> MemoryBank:
|
||||||
print("Creating memory bank")
|
|
||||||
assert url is None, "URL is not supported for this implementation"
|
assert url is None, "URL is not supported for this implementation"
|
||||||
assert (
|
assert (
|
||||||
config.type == MemoryBankType.vector.value
|
config.type == MemoryBankType.vector.value
|
||||||
|
|
|
@ -10,6 +10,7 @@ import fire
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import UserMessage
|
from llama_models.llama3.api.datatypes import UserMessage
|
||||||
|
from pydantic import BaseModel
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from .api import (
|
from .api import (
|
||||||
|
@ -25,6 +26,10 @@ async def get_client_impl(base_url: str):
|
||||||
return SafetyClient(base_url)
|
return SafetyClient(base_url)
|
||||||
|
|
||||||
|
|
||||||
|
def encodable_dict(d: BaseModel):
|
||||||
|
return json.loads(d.json())
|
||||||
|
|
||||||
|
|
||||||
class SafetyClient(Safety):
|
class SafetyClient(Safety):
|
||||||
def __init__(self, base_url: str):
|
def __init__(self, base_url: str):
|
||||||
print(f"Initializing client for {base_url}")
|
print(f"Initializing client for {base_url}")
|
||||||
|
@ -40,7 +45,9 @@ class SafetyClient(Safety):
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{self.base_url}/safety/run_shields",
|
f"{self.base_url}/safety/run_shields",
|
||||||
data=request.json(),
|
json={
|
||||||
|
"request": encodable_dict(request),
|
||||||
|
},
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
timeout=20,
|
timeout=20,
|
||||||
)
|
)
|
||||||
|
|
|
@ -22,6 +22,7 @@ from .ipython_tool.code_execution import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_toolchain.inference.api import * # noqa: F403
|
||||||
|
from llama_toolchain.agentic_system.api import * # noqa: F403
|
||||||
|
|
||||||
from .base import BaseTool
|
from .base import BaseTool
|
||||||
|
|
||||||
|
@ -55,9 +56,6 @@ class SingleMessageBuiltinTool(BaseTool):
|
||||||
tool_name=tool_call.tool_name,
|
tool_name=tool_call.tool_name,
|
||||||
content=response,
|
content=response,
|
||||||
)
|
)
|
||||||
if attachment := interpret_content_as_attachment(response):
|
|
||||||
message.content = attachment
|
|
||||||
|
|
||||||
return [message]
|
return [message]
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -316,7 +314,4 @@ class CodeInterpreterTool(BaseTool):
|
||||||
tool_name=tool_call.tool_name,
|
tool_name=tool_call.tool_name,
|
||||||
content="\n".join(pieces),
|
content="\n".join(pieces),
|
||||||
)
|
)
|
||||||
if attachment := interpret_content_as_attachment(res["stdout"]):
|
|
||||||
message.content = attachment
|
|
||||||
|
|
||||||
return [message]
|
return [message]
|
||||||
|
|
|
@ -12,9 +12,6 @@ from typing import Dict, List
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.agentic_system.api import * # noqa: F403
|
from llama_toolchain.agentic_system.api import * # noqa: F403
|
||||||
|
|
||||||
# TODO: this is symptomatic of us needing to pull more tooling related utilities
|
|
||||||
from llama_toolchain.tools.builtin import interpret_content_as_attachment
|
|
||||||
|
|
||||||
|
|
||||||
class CustomTool:
|
class CustomTool:
|
||||||
"""
|
"""
|
||||||
|
@ -94,9 +91,6 @@ class SingleMessageCustomTool(CustomTool):
|
||||||
tool_name=tool_call.tool_name,
|
tool_name=tool_call.tool_name,
|
||||||
content=response_str,
|
content=response_str,
|
||||||
)
|
)
|
||||||
if attachment := interpret_content_as_attachment(response_str):
|
|
||||||
message.content = attachment
|
|
||||||
|
|
||||||
return [message]
|
return [message]
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue