Update Strategy in SamplingParams to be a union

This commit is contained in:
Hardik Shah 2025-01-14 15:56:02 -08:00 committed by Ashwin Bharambe
parent 300e6e2702
commit dea575c994
28 changed files with 600 additions and 377 deletions

View file

@ -26,27 +26,28 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"import json\n",
"import asyncio\n",
"import nest_asyncio\n",
"import json\n",
"import os\n",
"from typing import Dict, List\n",
"\n",
"import nest_asyncio\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from llama_stack_client import LlamaStackClient\n",
"from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
"from llama_stack_client.types.shared.tool_response_message import ToolResponseMessage\n",
"from llama_stack_client.types import CompletionMessage\n",
"from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types import CompletionMessage\n",
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
"from llama_stack_client.types.shared.tool_response_message import ToolResponseMessage\n",
"\n",
"# Allow asyncio to run in Jupyter Notebook\n",
"nest_asyncio.apply()\n",
"\n",
"HOST='localhost'\n",
"PORT=5001\n",
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
"HOST = \"localhost\"\n",
"PORT = 5001\n",
"MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
]
},
{
@ -69,7 +70,7 @@
"outputs": [],
"source": [
"load_dotenv()\n",
"BRAVE_SEARCH_API_KEY = os.environ['BRAVE_SEARCH_API_KEY']"
"BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
]
},
{
@ -118,7 +119,7 @@
" cleaned = {k: v for k, v in results[idx].items() if k in selected_keys}\n",
" clean_response.append(cleaned)\n",
"\n",
" return {\"query\": query, \"top_k\": clean_response}"
" return {\"query\": query, \"top_k\": clean_response}\n"
]
},
{
@ -157,25 +158,29 @@
" for message in messages:\n",
" if isinstance(message, CompletionMessage) and message.tool_calls:\n",
" for tool_call in message.tool_calls:\n",
" if 'query' in tool_call.arguments:\n",
" query = tool_call.arguments['query']\n",
" if \"query\" in tool_call.arguments:\n",
" query = tool_call.arguments[\"query\"]\n",
" call_id = tool_call.call_id\n",
"\n",
" if query:\n",
" search_result = await self.run_impl(query)\n",
" return [ToolResponseMessage(\n",
" call_id=call_id,\n",
" role=\"ipython\",\n",
" content=self._format_response_for_agent(search_result),\n",
" tool_name=\"brave_search\"\n",
" )]\n",
" return [\n",
" ToolResponseMessage(\n",
" call_id=call_id,\n",
" role=\"ipython\",\n",
" content=self._format_response_for_agent(search_result),\n",
" tool_name=\"brave_search\",\n",
" )\n",
" ]\n",
"\n",
" return [ToolResponseMessage(\n",
" call_id=\"no_call_id\",\n",
" role=\"ipython\",\n",
" content=\"No query provided.\",\n",
" tool_name=\"brave_search\"\n",
" )]\n",
" return [\n",
" ToolResponseMessage(\n",
" call_id=\"no_call_id\",\n",
" role=\"ipython\",\n",
" content=\"No query provided.\",\n",
" tool_name=\"brave_search\",\n",
" )\n",
" ]\n",
"\n",
" def _format_response_for_agent(self, search_result):\n",
" parsed_result = json.loads(search_result)\n",
@ -186,7 +191,7 @@
" f\" URL: {result.get('url', 'No URL')}\\n\"\n",
" f\" Description: {result.get('description', 'No Description')}\\n\\n\"\n",
" )\n",
" return formatted_result"
" return formatted_result\n"
]
},
{
@ -209,7 +214,7 @@
"async def execute_search(query: str):\n",
" web_search_tool = WebSearchTool(api_key=BRAVE_SEARCH_API_KEY)\n",
" result = await web_search_tool.run_impl(query)\n",
" print(\"Search Results:\", result)"
" print(\"Search Results:\", result)\n"
]
},
{
@ -236,7 +241,7 @@
],
"source": [
"query = \"Latest developments in quantum computing\"\n",
"asyncio.run(execute_search(query))"
"asyncio.run(execute_search(query))\n"
]
},
{
@ -288,19 +293,17 @@
"\n",
" # Initialize custom tool (ensure `WebSearchTool` is defined earlier in the notebook)\n",
" webSearchTool = WebSearchTool(api_key=BRAVE_SEARCH_API_KEY)\n",
" \n",
"\n",
" # Define the agent configuration, including the model and tool setup\n",
" agent_config = AgentConfig(\n",
" model=MODEL_NAME,\n",
" instructions=\"\"\"You are a helpful assistant that responds to user queries with relevant information and cites sources when available.\"\"\",\n",
" sampling_params={\n",
" \"strategy\": \"greedy\",\n",
" \"temperature\": 1.0,\n",
" \"top_p\": 0.9,\n",
" \"strategy\": {\n",
" \"type\": \"greedy\",\n",
" },\n",
" },\n",
" tools=[\n",
" webSearchTool.get_tool_definition()\n",
" ],\n",
" tools=[webSearchTool.get_tool_definition()],\n",
" tool_choice=\"auto\",\n",
" tool_prompt_format=\"python_list\",\n",
" input_shields=input_shields,\n",
@ -329,8 +332,9 @@
" async for log in EventLogger().log(response):\n",
" log.print()\n",
"\n",
"\n",
"# Run the function asynchronously in a Jupyter Notebook cell\n",
"await run_main(disable_safety=True)"
"await run_main(disable_safety=True)\n"
]
}
],

View file

@ -50,8 +50,8 @@
"outputs": [],
"source": [
"HOST = \"localhost\" # Replace with your host\n",
"PORT = 5001 # Replace with your port\n",
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
"PORT = 5001 # Replace with your port\n",
"MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
]
},
{
@ -60,10 +60,12 @@
"metadata": {},
"outputs": [],
"source": [
"from dotenv import load_dotenv\n",
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()\n",
"BRAVE_SEARCH_API_KEY = os.environ['BRAVE_SEARCH_API_KEY']"
"BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
]
},
{
@ -104,20 +106,22 @@
],
"source": [
"import os\n",
"\n",
"from llama_stack_client import LlamaStackClient\n",
"from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
"\n",
"\n",
"async def agent_example():\n",
" client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n",
" agent_config = AgentConfig(\n",
" model=MODEL_NAME,\n",
" instructions=\"You are a helpful assistant! If you call builtin tools like brave search, follow the syntax brave_search.call(…)\",\n",
" sampling_params={\n",
" \"strategy\": \"greedy\",\n",
" \"temperature\": 1.0,\n",
" \"top_p\": 0.9,\n",
" \"strategy\": {\n",
" \"type\": \"greedy\",\n",
" },\n",
" },\n",
" tools=[\n",
" {\n",
@ -157,7 +161,7 @@
" log.print()\n",
"\n",
"\n",
"await agent_example()"
"await agent_example()\n"
]
},
{

View file

@ -157,7 +157,15 @@ curl http://localhost:$LLAMA_STACK_PORT/alpha/inference/chat-completion
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write me a 2-sentence poem about the moon"}
],
"sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
"sampling_params": {
"strategy": {
"type": "top_p",
"temperatrue": 0.7,
"top_p": 0.95,
},
"seed": 42,
"max_tokens": 512
}
}
EOF
```

View file

@ -83,8 +83,8 @@
},
"outputs": [],
"source": [
"LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
"LLAMA31_8B_INSTRUCT = \"Llama3.1-8B-Instruct\""
"LLAMA_STACK_API_TOGETHER_URL = \"https://llama-stack.together.ai\"\n",
"LLAMA31_8B_INSTRUCT = \"Llama3.1-8B-Instruct\"\n"
]
},
{
@ -107,12 +107,13 @@
" AgentConfigToolSearchToolDefinition,\n",
")\n",
"\n",
"\n",
"# Helper function to create an agent with tools\n",
"async def create_tool_agent(\n",
" client: LlamaStackClient,\n",
" tools: List[Dict],\n",
" instructions: str = \"You are a helpful assistant\",\n",
" model: str = LLAMA31_8B_INSTRUCT\n",
" model: str = LLAMA31_8B_INSTRUCT,\n",
") -> Agent:\n",
" \"\"\"Create an agent with specified tools.\"\"\"\n",
" print(\"Using the following model: \", model)\n",
@ -120,9 +121,9 @@
" model=model,\n",
" instructions=instructions,\n",
" sampling_params={\n",
" \"strategy\": \"greedy\",\n",
" \"temperature\": 1.0,\n",
" \"top_p\": 0.9,\n",
" \"strategy\": {\n",
" \"type\": \"greedy\",\n",
" },\n",
" },\n",
" tools=tools,\n",
" tool_choice=\"auto\",\n",
@ -130,7 +131,7 @@
" enable_session_persistence=True,\n",
" )\n",
"\n",
" return Agent(client, agent_config)"
" return Agent(client, agent_config)\n"
]
},
{
@ -172,7 +173,8 @@
],
"source": [
"# comment this if you don't have a BRAVE_SEARCH_API_KEY\n",
"os.environ[\"BRAVE_SEARCH_API_KEY\"] = 'YOUR_BRAVE_SEARCH_API_KEY'\n",
"os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"YOUR_BRAVE_SEARCH_API_KEY\"\n",
"\n",
"\n",
"async def create_search_agent(client: LlamaStackClient) -> Agent:\n",
" \"\"\"Create an agent with Brave Search capability.\"\"\"\n",
@ -186,8 +188,8 @@
"\n",
" return await create_tool_agent(\n",
" client=client,\n",
" tools=[search_tool], # set this to [] if you don't have a BRAVE_SEARCH_API_KEY\n",
" model = LLAMA31_8B_INSTRUCT,\n",
" tools=[search_tool], # set this to [] if you don't have a BRAVE_SEARCH_API_KEY\n",
" model=LLAMA31_8B_INSTRUCT,\n",
" instructions=\"\"\"\n",
" You are a research assistant that can search the web.\n",
" Always cite your sources with URLs when providing information.\n",
@ -198,9 +200,10 @@
"\n",
" SOURCES:\n",
" - [Source title](URL)\n",
" \"\"\"\n",
" \"\"\",\n",
" )\n",
"\n",
"\n",
"# Example usage\n",
"async def search_example():\n",
" client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL)\n",
@ -212,7 +215,7 @@
" # Example queries\n",
" queries = [\n",
" \"What are the latest developments in quantum computing?\",\n",
" #\"Who won the most recent Super Bowl?\",\n",
" # \"Who won the most recent Super Bowl?\",\n",
" ]\n",
"\n",
" for query in queries:\n",
@ -227,8 +230,9 @@
" async for log in EventLogger().log(response):\n",
" log.print()\n",
"\n",
"\n",
"# Run the example (in Jupyter, use asyncio.run())\n",
"await search_example()"
"await search_example()\n"
]
},
{
@ -286,12 +290,16 @@
}
],
"source": [
"from typing import TypedDict, Optional, Dict, Any\n",
"from datetime import datetime\n",
"import json\n",
"from llama_stack_client.types.tool_param_definition_param import ToolParamDefinitionParam\n",
"from llama_stack_client.types import CompletionMessage,ToolResponseMessage\n",
"from datetime import datetime\n",
"from typing import Any, Dict, Optional, TypedDict\n",
"\n",
"from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
"from llama_stack_client.types import CompletionMessage, ToolResponseMessage\n",
"from llama_stack_client.types.tool_param_definition_param import (\n",
" ToolParamDefinitionParam,\n",
")\n",
"\n",
"\n",
"class WeatherTool(CustomTool):\n",
" \"\"\"Example custom tool for weather information.\"\"\"\n",
@ -305,16 +313,15 @@
" def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]:\n",
" return {\n",
" \"location\": ToolParamDefinitionParam(\n",
" param_type=\"str\",\n",
" description=\"City or location name\",\n",
" required=True\n",
" param_type=\"str\", description=\"City or location name\", required=True\n",
" ),\n",
" \"date\": ToolParamDefinitionParam(\n",
" param_type=\"str\",\n",
" description=\"Optional date (YYYY-MM-DD)\",\n",
" required=False\n",
" )\n",
" required=False,\n",
" ),\n",
" }\n",
"\n",
" async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:\n",
" assert len(messages) == 1, \"Expected single message\"\n",
"\n",
@ -337,20 +344,14 @@
" )\n",
" return [message]\n",
"\n",
" async def run_impl(self, location: str, date: Optional[str] = None) -> Dict[str, Any]:\n",
" async def run_impl(\n",
" self, location: str, date: Optional[str] = None\n",
" ) -> Dict[str, Any]:\n",
" \"\"\"Simulate getting weather data (replace with actual API call).\"\"\"\n",
" # Mock implementation\n",
" if date:\n",
" return {\n",
" \"temperature\": 90.1,\n",
" \"conditions\": \"sunny\",\n",
" \"humidity\": 40.0\n",
" }\n",
" return {\n",
" \"temperature\": 72.5,\n",
" \"conditions\": \"partly cloudy\",\n",
" \"humidity\": 65.0\n",
" }\n",
" return {\"temperature\": 90.1, \"conditions\": \"sunny\", \"humidity\": 40.0}\n",
" return {\"temperature\": 72.5, \"conditions\": \"partly cloudy\", \"humidity\": 65.0}\n",
"\n",
"\n",
"async def create_weather_agent(client: LlamaStackClient) -> Agent:\n",
@ -358,38 +359,33 @@
"\n",
" # Create the agent with the tool\n",
" weather_tool = WeatherTool()\n",
" \n",
"\n",
" agent_config = AgentConfig(\n",
" model=LLAMA31_8B_INSTRUCT,\n",
" #model=model_name,\n",
" # model=model_name,\n",
" instructions=\"\"\"\n",
" You are a weather assistant that can provide weather information.\n",
" Always specify the location clearly in your responses.\n",
" Include both temperature and conditions in your summaries.\n",
" \"\"\",\n",
" sampling_params={\n",
" \"strategy\": \"greedy\",\n",
" \"temperature\": 1.0,\n",
" \"top_p\": 0.9,\n",
" \"strategy\": {\n",
" \"type\": \"greedy\",\n",
" },\n",
" },\n",
" tools=[\n",
" weather_tool.get_tool_definition()\n",
" ],\n",
" tools=[weather_tool.get_tool_definition()],\n",
" tool_choice=\"auto\",\n",
" tool_prompt_format=\"json\",\n",
" input_shields=[],\n",
" output_shields=[],\n",
" enable_session_persistence=True\n",
" enable_session_persistence=True,\n",
" )\n",
"\n",
" agent = Agent(\n",
" client=client,\n",
" agent_config=agent_config,\n",
" custom_tools=[weather_tool]\n",
" )\n",
" agent = Agent(client=client, agent_config=agent_config, custom_tools=[weather_tool])\n",
"\n",
" return agent\n",
"\n",
"\n",
"# Example usage\n",
"async def weather_example():\n",
" client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL)\n",
@ -413,12 +409,14 @@
" async for log in EventLogger().log(response):\n",
" log.print()\n",
"\n",
"\n",
"# For Jupyter notebooks\n",
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()\n",
"\n",
"# Run the example\n",
"await weather_example()"
"await weather_example()\n"
]
},
{