From 1bf40af2abc5fc98692d5fb9bd8ca3948d211ed7 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 5 Mar 2025 11:18:37 -0800 Subject: [PATCH] update notebook not use together's endpoint --- docs/getting_started.ipynb | 172 ++++++++++++++++++++++++++++++++----- 1 file changed, 150 insertions(+), 22 deletions(-) diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index 74f1f3677..4ac8ad3a5 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 18, "id": "E1UFuJC570Tk", "metadata": { "colab": { @@ -326,54 +326,108 @@ " type: sqlite\n", "models:\n", "- metadata: {}\n", + " model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-3.1-8B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-3.1-70B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-3.1-405B-Instruct-FP8\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-3.2-3B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-3.2-11B-Vision-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-3.2-90B-Vision-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-3.3-70B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Meta-Llama-Guard-3-8B\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-Guard-3-8B\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-Guard-3-8B\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-Guard-3-8B\n", "- metadata: {}\n", + " model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-Guard-3-11B-Vision\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", @@ -473,6 +527,9 @@ " - config: {}\n", " provider_id: model-context-protocol\n", " provider_type: remote::model-context-protocol\n", + " - config: {}\n", + " provider_id: wolfram-alpha\n", + " provider_type: remote::wolfram-alpha\n", " vector_io:\n", " - config:\n", " kvstore:\n", @@ -504,6 +561,10 @@ " mcp_endpoint: null\n", " provider_id: code-interpreter\n", " toolgroup_id: builtin::code_interpreter\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: wolfram-alpha\n", + " toolgroup_id: builtin::wolfram_alpha\n", "vector_dbs: []\n", "version: '2'\n", "\n", @@ -530,54 +591,108 @@ " type: sqlite\n", "models:\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision\n", " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", @@ -677,6 +792,9 @@ " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " provider_id: model-context-protocol\n", " provider_type: remote::model-context-protocol\n", + " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " provider_id: wolfram-alpha\n", + " provider_type: remote::wolfram-alpha\n", " vector_io:\n", " - config:\n", " kvstore:\n", @@ -708,6 +826,10 @@ " mcp_endpoint: null\n", " provider_id: code-interpreter\n", " toolgroup_id: builtin::code_interpreter\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: wolfram-alpha\n", + " toolgroup_id: builtin::wolfram_alpha\n", "vector_dbs: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "version: \u001b[32m'2'\u001b[0m\n", "\n" @@ -4131,7 +4253,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "id": "a2c1e1c2", "metadata": {}, "outputs": [], @@ -4158,28 +4280,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 21, "id": "d7914894", "metadata": {}, "outputs": [ { - "ename": "InternalServerError", - "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m client \u001b[38;5;241m=\u001b[39m LlamaStackClient(\n\u001b[1;32m 2\u001b[0m base_url\u001b[38;5;241m=\u001b[39mLLAMA_STACK_API_TOGETHER_URL,\n\u001b[1;32m 3\u001b[0m )\n\u001b[0;32m----> 5\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrole\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# {\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"type\": \"image\",\u001b[39;49;00m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"image\": {\u001b[39;49;00m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"url\": {\u001b[39;49;00m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"uri\": encode_image(\"Llama_Repo.jpeg\")\u001b[39;49;00m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# }\u001b[39;49;00m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# }\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtext\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtext\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHow many different colors are those llamas? What are those colors?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.1-8B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28mprint\u001b[39m(response\u001b[38;5;241m.\u001b[39mcompletion_message\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;241m.\u001b[39mlower()\u001b[38;5;241m.\u001b[39mstrip())\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/resources/inference.py:291\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_config, tool_prompt_format, tools, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream:\n\u001b[1;32m 290\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[0;32m--> 291\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 292\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/v1/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 297\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_config\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 309\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 310\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 311\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mChatCompletionResponse\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 313\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mChatCompletionResponseStreamChunk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1225\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1211\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1212\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1213\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1220\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1221\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1222\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1223\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1224\u001b[0m )\n\u001b[0;32m-> 1225\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:917\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 914\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 915\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 917\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 919\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 920\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 921\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 922\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 923\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1005\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1004\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1005\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1006\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1007\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1008\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1009\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1010\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1011\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1012\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1014\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1015\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1054\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1054\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1055\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1056\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1057\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1058\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1059\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1060\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1005\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1004\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1005\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1006\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1007\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1008\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1009\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1010\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1011\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1012\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1014\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1015\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1054\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1054\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1055\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1056\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1057\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1058\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1059\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1060\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/meta-llama/llama-stack-client-python/src/llama_stack_client/_base_client.py:1020\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1017\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1019\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1020\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1022\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1023\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1024\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1028\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1029\u001b[0m )\n", - "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}" + "name": "stdout", + "output_type": "stream", + "text": [ + "There are three llamas in the image. The llama in the middle is purple, the llama on the left is white, and the llama on the right is also white, but it is wearing a blue party hat. Therefore, there are two different colors of llama in the image: purple and white.\n" ] } ], @@ -4223,11 +4332,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "f9a83275", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33minference> \u001b[0m\u001b[33mThere\u001b[0m\u001b[33m are\u001b[0m\u001b[33m three\u001b[0m\u001b[33m different\u001b[0m\u001b[33m colors\u001b[0m\u001b[33m of\u001b[0m\u001b[33m ll\u001b[0m\u001b[33mamas\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m image\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m first\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m on\u001b[0m\u001b[33m the\u001b[0m\u001b[33m left\u001b[0m\u001b[33m is\u001b[0m\u001b[33m white\u001b[0m\u001b[33m,\u001b[0m\u001b[33m the\u001b[0m\u001b[33m second\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m middle\u001b[0m\u001b[33m is\u001b[0m\u001b[33m purple\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m the\u001b[0m\u001b[33m third\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m on\u001b[0m\u001b[33m the\u001b[0m\u001b[33m right\u001b[0m\u001b[33m is\u001b[0m\u001b[33m white\u001b[0m\u001b[33m with\u001b[0m\u001b[33m a\u001b[0m\u001b[33m blue\u001b[0m\u001b[33m party\u001b[0m\u001b[33m hat\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[30m\u001b[0m" + ] + } + ], "source": [ + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "\n", "agent_config = AgentConfig(\n", " model=vision_model_id,\n", " instructions=\"You are a helpful assistant\",\n", @@ -4263,6 +4383,14 @@ " log.print()\n", " " ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3352379", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {