diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index 74f1f3677..4ac8ad3a5 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -141,7 +141,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 18,
"id": "E1UFuJC570Tk",
"metadata": {
"colab": {
@@ -326,54 +326,108 @@
" type: sqlite\n",
"models:\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-3.1-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-3.1-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-3.1-405B-Instruct-FP8\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-3.2-3B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-3.2-11B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-3.2-90B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-3.3-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Meta-Llama-Guard-3-8B\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-Guard-3-8B\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-Guard-3-8B\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-Guard-3-8B\n",
"- metadata: {}\n",
+ " model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
+ "- metadata: {}\n",
" model_id: meta-llama/Llama-Guard-3-11B-Vision\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
@@ -473,6 +527,9 @@
" - config: {}\n",
" provider_id: model-context-protocol\n",
" provider_type: remote::model-context-protocol\n",
+ " - config: {}\n",
+ " provider_id: wolfram-alpha\n",
+ " provider_type: remote::wolfram-alpha\n",
" vector_io:\n",
" - config:\n",
" kvstore:\n",
@@ -504,6 +561,10 @@
" mcp_endpoint: null\n",
" provider_id: code-interpreter\n",
" toolgroup_id: builtin::code_interpreter\n",
+ "- args: null\n",
+ " mcp_endpoint: null\n",
+ " provider_id: wolfram-alpha\n",
+ " toolgroup_id: builtin::wolfram_alpha\n",
"vector_dbs: []\n",
"version: '2'\n",
"\n",
@@ -530,54 +591,108 @@
" type: sqlite\n",
"models:\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n",
+ " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+ " - llm\n",
+ " provider_id: together\n",
+ " provider_model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n",
+ "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
@@ -677,6 +792,9 @@
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: model-context-protocol\n",
" provider_type: remote::model-context-protocol\n",
+ " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+ " provider_id: wolfram-alpha\n",
+ " provider_type: remote::wolfram-alpha\n",
" vector_io:\n",
" - config:\n",
" kvstore:\n",
@@ -708,6 +826,10 @@
" mcp_endpoint: null\n",
" provider_id: code-interpreter\n",
" toolgroup_id: builtin::code_interpreter\n",
+ "- args: null\n",
+ " mcp_endpoint: null\n",
+ " provider_id: wolfram-alpha\n",
+ " toolgroup_id: builtin::wolfram_alpha\n",
"vector_dbs: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"version: \u001b[32m'2'\u001b[0m\n",
"\n"
@@ -4131,7 +4253,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 20,
"id": "a2c1e1c2",
"metadata": {},
"outputs": [],
@@ -4158,28 +4280,15 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 21,
"id": "d7914894",
"metadata": {},
"outputs": [
{
- "ename": "InternalServerError",
- "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[7], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m client \u001b[38;5;241m=\u001b[39m LlamaStackClient(\n\u001b[1;32m 2\u001b[0m base_url\u001b[38;5;241m=\u001b[39mLLAMA_STACK_API_TOGETHER_URL,\n\u001b[1;32m 3\u001b[0m )\n\u001b[0;32m----> 5\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrole\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# {\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"type\": \"image\",\u001b[39;49;00m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"image\": {\u001b[39;49;00m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"url\": {\u001b[39;49;00m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"uri\": encode_image(\"Llama_Repo.jpeg\")\u001b[39;49;00m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# }\u001b[39;49;00m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# }\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtext\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtext\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHow many different colors are those llamas? What are those colors?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.1-8B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28mprint\u001b[39m(response\u001b[38;5;241m.\u001b[39mcompletion_message\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;241m.\u001b[39mlower()\u001b[38;5;241m.\u001b[39mstrip())\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/resources/inference.py:291\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_config, tool_prompt_format, tools, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream:\n\u001b[1;32m 290\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[0;32m--> 291\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 292\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/v1/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 297\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_config\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 309\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 310\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 311\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mChatCompletionResponse\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 313\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mChatCompletionResponseStreamChunk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1225\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1211\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1212\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1213\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1220\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1221\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1222\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1223\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1224\u001b[0m )\n\u001b[0;32m-> 1225\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:917\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 914\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 915\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 917\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 919\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 920\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 921\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 922\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 923\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1005\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1004\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1005\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1006\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1007\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1008\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1009\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1010\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1011\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1012\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1014\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1015\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1054\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1054\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1055\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1056\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1057\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1058\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1059\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1060\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1005\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1004\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1005\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1006\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1007\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1008\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1009\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1010\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1011\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1012\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1014\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1015\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1054\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1054\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1055\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1056\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1057\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1058\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1059\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1060\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1020\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1017\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1019\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1020\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1022\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1023\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1024\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1028\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1029\u001b[0m )\n",
- "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "There are three llamas in the image. The llama in the middle is purple, the llama on the left is white, and the llama on the right is also white, but it is wearing a blue party hat. Therefore, there are two different colors of llama in the image: purple and white.\n"
]
}
],
@@ -4223,11 +4332,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"id": "f9a83275",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[33minference> \u001b[0m\u001b[33mThere\u001b[0m\u001b[33m are\u001b[0m\u001b[33m three\u001b[0m\u001b[33m different\u001b[0m\u001b[33m colors\u001b[0m\u001b[33m of\u001b[0m\u001b[33m ll\u001b[0m\u001b[33mamas\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m image\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m first\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m on\u001b[0m\u001b[33m the\u001b[0m\u001b[33m left\u001b[0m\u001b[33m is\u001b[0m\u001b[33m white\u001b[0m\u001b[33m,\u001b[0m\u001b[33m the\u001b[0m\u001b[33m second\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m middle\u001b[0m\u001b[33m is\u001b[0m\u001b[33m purple\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m the\u001b[0m\u001b[33m third\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m on\u001b[0m\u001b[33m the\u001b[0m\u001b[33m right\u001b[0m\u001b[33m is\u001b[0m\u001b[33m white\u001b[0m\u001b[33m with\u001b[0m\u001b[33m a\u001b[0m\u001b[33m blue\u001b[0m\u001b[33m party\u001b[0m\u001b[33m hat\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
+ "\u001b[30m\u001b[0m"
+ ]
+ }
+ ],
"source": [
+ "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+ "\n",
"agent_config = AgentConfig(\n",
" model=vision_model_id,\n",
" instructions=\"You are a helpful assistant\",\n",
@@ -4263,6 +4383,14 @@
" log.print()\n",
" "
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f3352379",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {