Update Strategy in SamplingParams to be a union

2026-01-02 01:44:31 +00:00 · 2025-01-14 15:56:02 -08:00 · 2025-01-14 15:56:02 -08:00 · dea575c994
commit dea575c994
parent 300e6e2702
28 changed files with 600 additions and 377 deletions
--- a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
+++ b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
@ -26,27 +26,28 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import os\n",
-    "import requests\n",
-    "import json\n",
    "import asyncio\n",
-    "import nest_asyncio\n",
+    "import json\n",
+    "import os\n",
    "from typing import Dict, List\n",
+    "\n",
+    "import nest_asyncio\n",
+    "import requests\n",
    "from dotenv import load_dotenv\n",
    "from llama_stack_client import LlamaStackClient\n",
-    "from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
-    "from llama_stack_client.types.shared.tool_response_message import ToolResponseMessage\n",
-    "from llama_stack_client.types import CompletionMessage\n",
    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
    "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+    "from llama_stack_client.types import CompletionMessage\n",
    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "from llama_stack_client.types.shared.tool_response_message import ToolResponseMessage\n",
    "\n",
    "# Allow asyncio to run in Jupyter Notebook\n",
    "nest_asyncio.apply()\n",
    "\n",
-    "HOST='localhost'\n",
-    "PORT=5001\n",
-    "MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
+    "HOST = \"localhost\"\n",
+    "PORT = 5001\n",
+    "MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
   ]
  },
  {
@ -69,7 +70,7 @@
   "outputs": [],
   "source": [
    "load_dotenv()\n",
-    "BRAVE_SEARCH_API_KEY = os.environ['BRAVE_SEARCH_API_KEY']"
+    "BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
   ]
  },
  {
@ -118,7 +119,7 @@
    "                cleaned = {k: v for k, v in results[idx].items() if k in selected_keys}\n",
    "                clean_response.append(cleaned)\n",
    "\n",
-    "        return {\"query\": query, \"top_k\": clean_response}"
+    "        return {\"query\": query, \"top_k\": clean_response}\n"
   ]
  },
  {
@ -157,25 +158,29 @@
    "        for message in messages:\n",
    "            if isinstance(message, CompletionMessage) and message.tool_calls:\n",
    "                for tool_call in message.tool_calls:\n",
-    "                    if 'query' in tool_call.arguments:\n",
-    "                        query = tool_call.arguments['query']\n",
+    "                    if \"query\" in tool_call.arguments:\n",
+    "                        query = tool_call.arguments[\"query\"]\n",
    "                        call_id = tool_call.call_id\n",
    "\n",
    "        if query:\n",
    "            search_result = await self.run_impl(query)\n",
-    "            return [ToolResponseMessage(\n",
-    "                call_id=call_id,\n",
-    "                role=\"ipython\",\n",
-    "                content=self._format_response_for_agent(search_result),\n",
-    "                tool_name=\"brave_search\"\n",
-    "            )]\n",
+    "            return [\n",
+    "                ToolResponseMessage(\n",
+    "                    call_id=call_id,\n",
+    "                    role=\"ipython\",\n",
+    "                    content=self._format_response_for_agent(search_result),\n",
+    "                    tool_name=\"brave_search\",\n",
+    "                )\n",
+    "            ]\n",
    "\n",
-    "        return [ToolResponseMessage(\n",
-    "            call_id=\"no_call_id\",\n",
-    "            role=\"ipython\",\n",
-    "            content=\"No query provided.\",\n",
-    "            tool_name=\"brave_search\"\n",
-    "        )]\n",
+    "        return [\n",
+    "            ToolResponseMessage(\n",
+    "                call_id=\"no_call_id\",\n",
+    "                role=\"ipython\",\n",
+    "                content=\"No query provided.\",\n",
+    "                tool_name=\"brave_search\",\n",
+    "            )\n",
+    "        ]\n",
    "\n",
    "    def _format_response_for_agent(self, search_result):\n",
    "        parsed_result = json.loads(search_result)\n",
@ -186,7 +191,7 @@
    "                f\"   URL: {result.get('url', 'No URL')}\\n\"\n",
    "                f\"   Description: {result.get('description', 'No Description')}\\n\\n\"\n",
    "            )\n",
-    "        return formatted_result"
+    "        return formatted_result\n"
   ]
  },
  {
@ -209,7 +214,7 @@
    "async def execute_search(query: str):\n",
    "    web_search_tool = WebSearchTool(api_key=BRAVE_SEARCH_API_KEY)\n",
    "    result = await web_search_tool.run_impl(query)\n",
-    "    print(\"Search Results:\", result)"
+    "    print(\"Search Results:\", result)\n"
   ]
  },
  {
@ -236,7 +241,7 @@
   ],
   "source": [
    "query = \"Latest developments in quantum computing\"\n",
-    "asyncio.run(execute_search(query))"
+    "asyncio.run(execute_search(query))\n"
   ]
  },
  {
@ -288,19 +293,17 @@
    "\n",
    "    # Initialize custom tool (ensure `WebSearchTool` is defined earlier in the notebook)\n",
    "    webSearchTool = WebSearchTool(api_key=BRAVE_SEARCH_API_KEY)\n",
-    "    \n",
+    "\n",
    "    # Define the agent configuration, including the model and tool setup\n",
    "    agent_config = AgentConfig(\n",
    "        model=MODEL_NAME,\n",
    "        instructions=\"\"\"You are a helpful assistant that responds to user queries with relevant information and cites sources when available.\"\"\",\n",
    "        sampling_params={\n",
-    "            \"strategy\": \"greedy\",\n",
-    "            \"temperature\": 1.0,\n",
-    "            \"top_p\": 0.9,\n",
+    "            \"strategy\": {\n",
+    "                \"type\": \"greedy\",\n",
+    "            },\n",
    "        },\n",
-    "        tools=[\n",
-    "            webSearchTool.get_tool_definition()\n",
-    "        ],\n",
+    "        tools=[webSearchTool.get_tool_definition()],\n",
    "        tool_choice=\"auto\",\n",
    "        tool_prompt_format=\"python_list\",\n",
    "        input_shields=input_shields,\n",
@ -329,8 +332,9 @@
    "    async for log in EventLogger().log(response):\n",
    "        log.print()\n",
    "\n",
+    "\n",
    "# Run the function asynchronously in a Jupyter Notebook cell\n",
-    "await run_main(disable_safety=True)"
+    "await run_main(disable_safety=True)\n"
   ]
  }
 ],
--- a/docs/zero_to_hero_guide/07_Agents101.ipynb
+++ b/docs/zero_to_hero_guide/07_Agents101.ipynb
@ -50,8 +50,8 @@
   "outputs": [],
   "source": [
    "HOST = \"localhost\"  # Replace with your host\n",
-    "PORT = 5001        # Replace with your port\n",
-    "MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
+    "PORT = 5001  # Replace with your port\n",
+    "MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
   ]
  },
  {
@ -60,10 +60,12 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from dotenv import load_dotenv\n",
    "import os\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
    "load_dotenv()\n",
-    "BRAVE_SEARCH_API_KEY = os.environ['BRAVE_SEARCH_API_KEY']"
+    "BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
   ]
  },
  {
@ -104,20 +106,22 @@
   ],
   "source": [
    "import os\n",
+    "\n",
    "from llama_stack_client import LlamaStackClient\n",
    "from llama_stack_client.lib.agents.agent import Agent\n",
    "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
    "\n",
+    "\n",
    "async def agent_example():\n",
    "    client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n",
    "    agent_config = AgentConfig(\n",
    "        model=MODEL_NAME,\n",
    "        instructions=\"You are a helpful assistant! If you call builtin tools like brave search, follow the syntax brave_search.call(…)\",\n",
    "        sampling_params={\n",
-    "            \"strategy\": \"greedy\",\n",
-    "            \"temperature\": 1.0,\n",
-    "            \"top_p\": 0.9,\n",
+    "            \"strategy\": {\n",
+    "                \"type\": \"greedy\",\n",
+    "            },\n",
    "        },\n",
    "        tools=[\n",
    "            {\n",
@ -157,7 +161,7 @@
    "            log.print()\n",
    "\n",
    "\n",
-    "await agent_example()"
+    "await agent_example()\n"
   ]
  },
  {
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@ -157,7 +157,15 @@ curl http://localhost:$LLAMA_STACK_PORT/alpha/inference/chat-completion
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Write me a 2-sentence poem about the moon"}
    ],
-    "sampling_params": {"temperature": 0.7, "seed": 42, "max_tokens": 512}
+    "sampling_params": {
+      "strategy": {
+         "type": "top_p",
+         "temperatrue": 0.7,
+         "top_p": 0.95,
+      },
+      "seed": 42,
+      "max_tokens": 512
+   }
 }
 EOF
 ```
--- a/docs/zero_to_hero_guide/Tool_Calling101_Using_Together's_Llama_Stack_Server.ipynb
+++ b/docs/zero_to_hero_guide/Tool_Calling101_Using_Together's_Llama_Stack_Server.ipynb
@ -83,8 +83,8 @@
   },
   "outputs": [],
   "source": [
-    "LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
-    "LLAMA31_8B_INSTRUCT = \"Llama3.1-8B-Instruct\""
+    "LLAMA_STACK_API_TOGETHER_URL = \"https://llama-stack.together.ai\"\n",
+    "LLAMA31_8B_INSTRUCT = \"Llama3.1-8B-Instruct\"\n"
   ]
  },
  {
@ -107,12 +107,13 @@
    "    AgentConfigToolSearchToolDefinition,\n",
    ")\n",
    "\n",
+    "\n",
    "# Helper function to create an agent with tools\n",
    "async def create_tool_agent(\n",
    "    client: LlamaStackClient,\n",
    "    tools: List[Dict],\n",
    "    instructions: str = \"You are a helpful assistant\",\n",
-    "    model: str = LLAMA31_8B_INSTRUCT\n",
+    "    model: str = LLAMA31_8B_INSTRUCT,\n",
    ") -> Agent:\n",
    "    \"\"\"Create an agent with specified tools.\"\"\"\n",
    "    print(\"Using the following model: \", model)\n",
@ -120,9 +121,9 @@
    "        model=model,\n",
    "        instructions=instructions,\n",
    "        sampling_params={\n",
-    "            \"strategy\": \"greedy\",\n",
-    "            \"temperature\": 1.0,\n",
-    "            \"top_p\": 0.9,\n",
+    "            \"strategy\": {\n",
+    "                \"type\": \"greedy\",\n",
+    "            },\n",
    "        },\n",
    "        tools=tools,\n",
    "        tool_choice=\"auto\",\n",
@ -130,7 +131,7 @@
    "        enable_session_persistence=True,\n",
    "    )\n",
    "\n",
-    "    return Agent(client, agent_config)"
+    "    return Agent(client, agent_config)\n"
   ]
  },
  {
@ -172,7 +173,8 @@
   ],
   "source": [
    "# comment this if you don't have a BRAVE_SEARCH_API_KEY\n",
-    "os.environ[\"BRAVE_SEARCH_API_KEY\"] = 'YOUR_BRAVE_SEARCH_API_KEY'\n",
+    "os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"YOUR_BRAVE_SEARCH_API_KEY\"\n",
+    "\n",
    "\n",
    "async def create_search_agent(client: LlamaStackClient) -> Agent:\n",
    "    \"\"\"Create an agent with Brave Search capability.\"\"\"\n",
@ -186,8 +188,8 @@
    "\n",
    "    return await create_tool_agent(\n",
    "        client=client,\n",
-    "        tools=[search_tool], # set this to [] if you don't have a BRAVE_SEARCH_API_KEY\n",
-    "        model = LLAMA31_8B_INSTRUCT,\n",
+    "        tools=[search_tool],  # set this to [] if you don't have a BRAVE_SEARCH_API_KEY\n",
+    "        model=LLAMA31_8B_INSTRUCT,\n",
    "        instructions=\"\"\"\n",
    "        You are a research assistant that can search the web.\n",
    "        Always cite your sources with URLs when providing information.\n",
@ -198,9 +200,10 @@
    "\n",
    "        SOURCES:\n",
    "        - [Source title](URL)\n",
-    "        \"\"\"\n",
+    "        \"\"\",\n",
    "    )\n",
    "\n",
+    "\n",
    "# Example usage\n",
    "async def search_example():\n",
    "    client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL)\n",
@ -212,7 +215,7 @@
    "    # Example queries\n",
    "    queries = [\n",
    "        \"What are the latest developments in quantum computing?\",\n",
-    "        #\"Who won the most recent Super Bowl?\",\n",
+    "        # \"Who won the most recent Super Bowl?\",\n",
    "    ]\n",
    "\n",
    "    for query in queries:\n",
@ -227,8 +230,9 @@
    "        async for log in EventLogger().log(response):\n",
    "            log.print()\n",
    "\n",
+    "\n",
    "# Run the example (in Jupyter, use asyncio.run())\n",
-    "await search_example()"
+    "await search_example()\n"
   ]
  },
  {
@ -286,12 +290,16 @@
    }
   ],
   "source": [
-    "from typing import TypedDict, Optional, Dict, Any\n",
-    "from datetime import datetime\n",
    "import json\n",
-    "from llama_stack_client.types.tool_param_definition_param import ToolParamDefinitionParam\n",
-    "from llama_stack_client.types import CompletionMessage,ToolResponseMessage\n",
+    "from datetime import datetime\n",
+    "from typing import Any, Dict, Optional, TypedDict\n",
+    "\n",
    "from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
+    "from llama_stack_client.types import CompletionMessage, ToolResponseMessage\n",
+    "from llama_stack_client.types.tool_param_definition_param import (\n",
+    "    ToolParamDefinitionParam,\n",
+    ")\n",
+    "\n",
    "\n",
    "class WeatherTool(CustomTool):\n",
    "    \"\"\"Example custom tool for weather information.\"\"\"\n",
@ -305,16 +313,15 @@
    "    def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]:\n",
    "        return {\n",
    "            \"location\": ToolParamDefinitionParam(\n",
-    "                param_type=\"str\",\n",
-    "                description=\"City or location name\",\n",
-    "                required=True\n",
+    "                param_type=\"str\", description=\"City or location name\", required=True\n",
    "            ),\n",
    "            \"date\": ToolParamDefinitionParam(\n",
    "                param_type=\"str\",\n",
    "                description=\"Optional date (YYYY-MM-DD)\",\n",
-    "                required=False\n",
-    "            )\n",
+    "                required=False,\n",
+    "            ),\n",
    "        }\n",
+    "\n",
    "    async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:\n",
    "        assert len(messages) == 1, \"Expected single message\"\n",
    "\n",
@ -337,20 +344,14 @@
    "        )\n",
    "        return [message]\n",
    "\n",
-    "    async def run_impl(self, location: str, date: Optional[str] = None) -> Dict[str, Any]:\n",
+    "    async def run_impl(\n",
+    "        self, location: str, date: Optional[str] = None\n",
+    "    ) -> Dict[str, Any]:\n",
    "        \"\"\"Simulate getting weather data (replace with actual API call).\"\"\"\n",
    "        # Mock implementation\n",
    "        if date:\n",
-    "            return {\n",
-    "            \"temperature\": 90.1,\n",
-    "            \"conditions\": \"sunny\",\n",
-    "            \"humidity\": 40.0\n",
-    "        }\n",
-    "        return {\n",
-    "            \"temperature\": 72.5,\n",
-    "            \"conditions\": \"partly cloudy\",\n",
-    "            \"humidity\": 65.0\n",
-    "        }\n",
+    "            return {\"temperature\": 90.1, \"conditions\": \"sunny\", \"humidity\": 40.0}\n",
+    "        return {\"temperature\": 72.5, \"conditions\": \"partly cloudy\", \"humidity\": 65.0}\n",
    "\n",
    "\n",
    "async def create_weather_agent(client: LlamaStackClient) -> Agent:\n",
@ -358,38 +359,33 @@
    "\n",
    "    # Create the agent with the tool\n",
    "    weather_tool = WeatherTool()\n",
-    "    \n",
+    "\n",
    "    agent_config = AgentConfig(\n",
    "        model=LLAMA31_8B_INSTRUCT,\n",
-    "        #model=model_name,\n",
+    "        # model=model_name,\n",
    "        instructions=\"\"\"\n",
    "        You are a weather assistant that can provide weather information.\n",
    "        Always specify the location clearly in your responses.\n",
    "        Include both temperature and conditions in your summaries.\n",
    "        \"\"\",\n",
    "        sampling_params={\n",
-    "            \"strategy\": \"greedy\",\n",
-    "            \"temperature\": 1.0,\n",
-    "            \"top_p\": 0.9,\n",
+    "            \"strategy\": {\n",
+    "                \"type\": \"greedy\",\n",
+    "            },\n",
    "        },\n",
-    "        tools=[\n",
-    "            weather_tool.get_tool_definition()\n",
-    "        ],\n",
+    "        tools=[weather_tool.get_tool_definition()],\n",
    "        tool_choice=\"auto\",\n",
    "        tool_prompt_format=\"json\",\n",
    "        input_shields=[],\n",
    "        output_shields=[],\n",
-    "        enable_session_persistence=True\n",
+    "        enable_session_persistence=True,\n",
    "    )\n",
    "\n",
-    "    agent = Agent(\n",
-    "        client=client,\n",
-    "        agent_config=agent_config,\n",
-    "        custom_tools=[weather_tool]\n",
-    "    )\n",
+    "    agent = Agent(client=client, agent_config=agent_config, custom_tools=[weather_tool])\n",
    "\n",
    "    return agent\n",
    "\n",
+    "\n",
    "# Example usage\n",
    "async def weather_example():\n",
    "    client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL)\n",
@ -413,12 +409,14 @@
    "        async for log in EventLogger().log(response):\n",
    "            log.print()\n",
    "\n",
+    "\n",
    "# For Jupyter notebooks\n",
    "import nest_asyncio\n",
+    "\n",
    "nest_asyncio.apply()\n",
    "\n",
    "# Run the example\n",
-    "await weather_example()"
+    "await weather_example()\n"
   ]
  },
  {