mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-02 01:44:31 +00:00
Update Strategy in SamplingParams to be a union
This commit is contained in:
parent
300e6e2702
commit
dea575c994
28 changed files with 600 additions and 377 deletions
|
|
@ -26,27 +26,28 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"import asyncio\n",
|
||||
"import nest_asyncio\n",
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"from typing import Dict, List\n",
|
||||
"\n",
|
||||
"import nest_asyncio\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
|
||||
"from llama_stack_client.types.shared.tool_response_message import ToolResponseMessage\n",
|
||||
"from llama_stack_client.types import CompletionMessage\n",
|
||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||
"from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
|
||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types import CompletionMessage\n",
|
||||
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||
"from llama_stack_client.types.shared.tool_response_message import ToolResponseMessage\n",
|
||||
"\n",
|
||||
"# Allow asyncio to run in Jupyter Notebook\n",
|
||||
"nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"HOST='localhost'\n",
|
||||
"PORT=5001\n",
|
||||
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
|
||||
"HOST = \"localhost\"\n",
|
||||
"PORT = 5001\n",
|
||||
"MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -69,7 +70,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv()\n",
|
||||
"BRAVE_SEARCH_API_KEY = os.environ['BRAVE_SEARCH_API_KEY']"
|
||||
"BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -118,7 +119,7 @@
|
|||
" cleaned = {k: v for k, v in results[idx].items() if k in selected_keys}\n",
|
||||
" clean_response.append(cleaned)\n",
|
||||
"\n",
|
||||
" return {\"query\": query, \"top_k\": clean_response}"
|
||||
" return {\"query\": query, \"top_k\": clean_response}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -157,25 +158,29 @@
|
|||
" for message in messages:\n",
|
||||
" if isinstance(message, CompletionMessage) and message.tool_calls:\n",
|
||||
" for tool_call in message.tool_calls:\n",
|
||||
" if 'query' in tool_call.arguments:\n",
|
||||
" query = tool_call.arguments['query']\n",
|
||||
" if \"query\" in tool_call.arguments:\n",
|
||||
" query = tool_call.arguments[\"query\"]\n",
|
||||
" call_id = tool_call.call_id\n",
|
||||
"\n",
|
||||
" if query:\n",
|
||||
" search_result = await self.run_impl(query)\n",
|
||||
" return [ToolResponseMessage(\n",
|
||||
" call_id=call_id,\n",
|
||||
" role=\"ipython\",\n",
|
||||
" content=self._format_response_for_agent(search_result),\n",
|
||||
" tool_name=\"brave_search\"\n",
|
||||
" )]\n",
|
||||
" return [\n",
|
||||
" ToolResponseMessage(\n",
|
||||
" call_id=call_id,\n",
|
||||
" role=\"ipython\",\n",
|
||||
" content=self._format_response_for_agent(search_result),\n",
|
||||
" tool_name=\"brave_search\",\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" return [ToolResponseMessage(\n",
|
||||
" call_id=\"no_call_id\",\n",
|
||||
" role=\"ipython\",\n",
|
||||
" content=\"No query provided.\",\n",
|
||||
" tool_name=\"brave_search\"\n",
|
||||
" )]\n",
|
||||
" return [\n",
|
||||
" ToolResponseMessage(\n",
|
||||
" call_id=\"no_call_id\",\n",
|
||||
" role=\"ipython\",\n",
|
||||
" content=\"No query provided.\",\n",
|
||||
" tool_name=\"brave_search\",\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" def _format_response_for_agent(self, search_result):\n",
|
||||
" parsed_result = json.loads(search_result)\n",
|
||||
|
|
@ -186,7 +191,7 @@
|
|||
" f\" URL: {result.get('url', 'No URL')}\\n\"\n",
|
||||
" f\" Description: {result.get('description', 'No Description')}\\n\\n\"\n",
|
||||
" )\n",
|
||||
" return formatted_result"
|
||||
" return formatted_result\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -209,7 +214,7 @@
|
|||
"async def execute_search(query: str):\n",
|
||||
" web_search_tool = WebSearchTool(api_key=BRAVE_SEARCH_API_KEY)\n",
|
||||
" result = await web_search_tool.run_impl(query)\n",
|
||||
" print(\"Search Results:\", result)"
|
||||
" print(\"Search Results:\", result)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -236,7 +241,7 @@
|
|||
],
|
||||
"source": [
|
||||
"query = \"Latest developments in quantum computing\"\n",
|
||||
"asyncio.run(execute_search(query))"
|
||||
"asyncio.run(execute_search(query))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -288,19 +293,17 @@
|
|||
"\n",
|
||||
" # Initialize custom tool (ensure `WebSearchTool` is defined earlier in the notebook)\n",
|
||||
" webSearchTool = WebSearchTool(api_key=BRAVE_SEARCH_API_KEY)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Define the agent configuration, including the model and tool setup\n",
|
||||
" agent_config = AgentConfig(\n",
|
||||
" model=MODEL_NAME,\n",
|
||||
" instructions=\"\"\"You are a helpful assistant that responds to user queries with relevant information and cites sources when available.\"\"\",\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": \"greedy\",\n",
|
||||
" \"temperature\": 1.0,\n",
|
||||
" \"top_p\": 0.9,\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"greedy\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" tools=[\n",
|
||||
" webSearchTool.get_tool_definition()\n",
|
||||
" ],\n",
|
||||
" tools=[webSearchTool.get_tool_definition()],\n",
|
||||
" tool_choice=\"auto\",\n",
|
||||
" tool_prompt_format=\"python_list\",\n",
|
||||
" input_shields=input_shields,\n",
|
||||
|
|
@ -329,8 +332,9 @@
|
|||
" async for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Run the function asynchronously in a Jupyter Notebook cell\n",
|
||||
"await run_main(disable_safety=True)"
|
||||
"await run_main(disable_safety=True)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
|
|||
|
|
@ -50,8 +50,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -60,10 +60,12 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()\n",
|
||||
"BRAVE_SEARCH_API_KEY = os.environ['BRAVE_SEARCH_API_KEY']"
|
||||
"BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -104,20 +106,22 @@
|
|||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def agent_example():\n",
|
||||
" client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n",
|
||||
" agent_config = AgentConfig(\n",
|
||||
" model=MODEL_NAME,\n",
|
||||
" instructions=\"You are a helpful assistant! If you call builtin tools like brave search, follow the syntax brave_search.call(…)\",\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": \"greedy\",\n",
|
||||
" \"temperature\": 1.0,\n",
|
||||
" \"top_p\": 0.9,\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"greedy\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" tools=[\n",
|
||||
" {\n",
|
||||
|
|
@ -157,7 +161,7 @@
|
|||
" log.print()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"await agent_example()"
|
||||
"await agent_example()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -157,7 +157,15 @@ curl http://localhost:$LLAMA_STACK_PORT/alpha/inference/chat-completion
|
|||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Write me a 2-sentence poem about the moon"}
|
||||
],
|
||||
"sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
|
||||
"sampling_params": {
|
||||
"strategy": {
|
||||
"type": "top_p",
|
||||
"temperatrue": 0.7,
|
||||
"top_p": 0.95,
|
||||
},
|
||||
"seed": 42,
|
||||
"max_tokens": 512
|
||||
}
|
||||
}
|
||||
EOF
|
||||
```
|
||||
|
|
|
|||
|
|
@ -83,8 +83,8 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
|
||||
"LLAMA31_8B_INSTRUCT = \"Llama3.1-8B-Instruct\""
|
||||
"LLAMA_STACK_API_TOGETHER_URL = \"https://llama-stack.together.ai\"\n",
|
||||
"LLAMA31_8B_INSTRUCT = \"Llama3.1-8B-Instruct\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -107,12 +107,13 @@
|
|||
" AgentConfigToolSearchToolDefinition,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Helper function to create an agent with tools\n",
|
||||
"async def create_tool_agent(\n",
|
||||
" client: LlamaStackClient,\n",
|
||||
" tools: List[Dict],\n",
|
||||
" instructions: str = \"You are a helpful assistant\",\n",
|
||||
" model: str = LLAMA31_8B_INSTRUCT\n",
|
||||
" model: str = LLAMA31_8B_INSTRUCT,\n",
|
||||
") -> Agent:\n",
|
||||
" \"\"\"Create an agent with specified tools.\"\"\"\n",
|
||||
" print(\"Using the following model: \", model)\n",
|
||||
|
|
@ -120,9 +121,9 @@
|
|||
" model=model,\n",
|
||||
" instructions=instructions,\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": \"greedy\",\n",
|
||||
" \"temperature\": 1.0,\n",
|
||||
" \"top_p\": 0.9,\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"greedy\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" tools=tools,\n",
|
||||
" tool_choice=\"auto\",\n",
|
||||
|
|
@ -130,7 +131,7 @@
|
|||
" enable_session_persistence=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" return Agent(client, agent_config)"
|
||||
" return Agent(client, agent_config)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -172,7 +173,8 @@
|
|||
],
|
||||
"source": [
|
||||
"# comment this if you don't have a BRAVE_SEARCH_API_KEY\n",
|
||||
"os.environ[\"BRAVE_SEARCH_API_KEY\"] = 'YOUR_BRAVE_SEARCH_API_KEY'\n",
|
||||
"os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"YOUR_BRAVE_SEARCH_API_KEY\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def create_search_agent(client: LlamaStackClient) -> Agent:\n",
|
||||
" \"\"\"Create an agent with Brave Search capability.\"\"\"\n",
|
||||
|
|
@ -186,8 +188,8 @@
|
|||
"\n",
|
||||
" return await create_tool_agent(\n",
|
||||
" client=client,\n",
|
||||
" tools=[search_tool], # set this to [] if you don't have a BRAVE_SEARCH_API_KEY\n",
|
||||
" model = LLAMA31_8B_INSTRUCT,\n",
|
||||
" tools=[search_tool], # set this to [] if you don't have a BRAVE_SEARCH_API_KEY\n",
|
||||
" model=LLAMA31_8B_INSTRUCT,\n",
|
||||
" instructions=\"\"\"\n",
|
||||
" You are a research assistant that can search the web.\n",
|
||||
" Always cite your sources with URLs when providing information.\n",
|
||||
|
|
@ -198,9 +200,10 @@
|
|||
"\n",
|
||||
" SOURCES:\n",
|
||||
" - [Source title](URL)\n",
|
||||
" \"\"\"\n",
|
||||
" \"\"\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Example usage\n",
|
||||
"async def search_example():\n",
|
||||
" client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL)\n",
|
||||
|
|
@ -212,7 +215,7 @@
|
|||
" # Example queries\n",
|
||||
" queries = [\n",
|
||||
" \"What are the latest developments in quantum computing?\",\n",
|
||||
" #\"Who won the most recent Super Bowl?\",\n",
|
||||
" # \"Who won the most recent Super Bowl?\",\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" for query in queries:\n",
|
||||
|
|
@ -227,8 +230,9 @@
|
|||
" async for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Run the example (in Jupyter, use asyncio.run())\n",
|
||||
"await search_example()"
|
||||
"await search_example()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -286,12 +290,16 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import TypedDict, Optional, Dict, Any\n",
|
||||
"from datetime import datetime\n",
|
||||
"import json\n",
|
||||
"from llama_stack_client.types.tool_param_definition_param import ToolParamDefinitionParam\n",
|
||||
"from llama_stack_client.types import CompletionMessage,ToolResponseMessage\n",
|
||||
"from datetime import datetime\n",
|
||||
"from typing import Any, Dict, Optional, TypedDict\n",
|
||||
"\n",
|
||||
"from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
|
||||
"from llama_stack_client.types import CompletionMessage, ToolResponseMessage\n",
|
||||
"from llama_stack_client.types.tool_param_definition_param import (\n",
|
||||
" ToolParamDefinitionParam,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class WeatherTool(CustomTool):\n",
|
||||
" \"\"\"Example custom tool for weather information.\"\"\"\n",
|
||||
|
|
@ -305,16 +313,15 @@
|
|||
" def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]:\n",
|
||||
" return {\n",
|
||||
" \"location\": ToolParamDefinitionParam(\n",
|
||||
" param_type=\"str\",\n",
|
||||
" description=\"City or location name\",\n",
|
||||
" required=True\n",
|
||||
" param_type=\"str\", description=\"City or location name\", required=True\n",
|
||||
" ),\n",
|
||||
" \"date\": ToolParamDefinitionParam(\n",
|
||||
" param_type=\"str\",\n",
|
||||
" description=\"Optional date (YYYY-MM-DD)\",\n",
|
||||
" required=False\n",
|
||||
" )\n",
|
||||
" required=False,\n",
|
||||
" ),\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:\n",
|
||||
" assert len(messages) == 1, \"Expected single message\"\n",
|
||||
"\n",
|
||||
|
|
@ -337,20 +344,14 @@
|
|||
" )\n",
|
||||
" return [message]\n",
|
||||
"\n",
|
||||
" async def run_impl(self, location: str, date: Optional[str] = None) -> Dict[str, Any]:\n",
|
||||
" async def run_impl(\n",
|
||||
" self, location: str, date: Optional[str] = None\n",
|
||||
" ) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Simulate getting weather data (replace with actual API call).\"\"\"\n",
|
||||
" # Mock implementation\n",
|
||||
" if date:\n",
|
||||
" return {\n",
|
||||
" \"temperature\": 90.1,\n",
|
||||
" \"conditions\": \"sunny\",\n",
|
||||
" \"humidity\": 40.0\n",
|
||||
" }\n",
|
||||
" return {\n",
|
||||
" \"temperature\": 72.5,\n",
|
||||
" \"conditions\": \"partly cloudy\",\n",
|
||||
" \"humidity\": 65.0\n",
|
||||
" }\n",
|
||||
" return {\"temperature\": 90.1, \"conditions\": \"sunny\", \"humidity\": 40.0}\n",
|
||||
" return {\"temperature\": 72.5, \"conditions\": \"partly cloudy\", \"humidity\": 65.0}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def create_weather_agent(client: LlamaStackClient) -> Agent:\n",
|
||||
|
|
@ -358,38 +359,33 @@
|
|||
"\n",
|
||||
" # Create the agent with the tool\n",
|
||||
" weather_tool = WeatherTool()\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" agent_config = AgentConfig(\n",
|
||||
" model=LLAMA31_8B_INSTRUCT,\n",
|
||||
" #model=model_name,\n",
|
||||
" # model=model_name,\n",
|
||||
" instructions=\"\"\"\n",
|
||||
" You are a weather assistant that can provide weather information.\n",
|
||||
" Always specify the location clearly in your responses.\n",
|
||||
" Include both temperature and conditions in your summaries.\n",
|
||||
" \"\"\",\n",
|
||||
" sampling_params={\n",
|
||||
" \"strategy\": \"greedy\",\n",
|
||||
" \"temperature\": 1.0,\n",
|
||||
" \"top_p\": 0.9,\n",
|
||||
" \"strategy\": {\n",
|
||||
" \"type\": \"greedy\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" tools=[\n",
|
||||
" weather_tool.get_tool_definition()\n",
|
||||
" ],\n",
|
||||
" tools=[weather_tool.get_tool_definition()],\n",
|
||||
" tool_choice=\"auto\",\n",
|
||||
" tool_prompt_format=\"json\",\n",
|
||||
" input_shields=[],\n",
|
||||
" output_shields=[],\n",
|
||||
" enable_session_persistence=True\n",
|
||||
" enable_session_persistence=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" agent = Agent(\n",
|
||||
" client=client,\n",
|
||||
" agent_config=agent_config,\n",
|
||||
" custom_tools=[weather_tool]\n",
|
||||
" )\n",
|
||||
" agent = Agent(client=client, agent_config=agent_config, custom_tools=[weather_tool])\n",
|
||||
"\n",
|
||||
" return agent\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Example usage\n",
|
||||
"async def weather_example():\n",
|
||||
" client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL)\n",
|
||||
|
|
@ -413,12 +409,14 @@
|
|||
" async for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# For Jupyter notebooks\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"# Run the example\n",
|
||||
"await weather_example()"
|
||||
"await weather_example()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue