diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml index 35cbc4dc3..9fe502254 100644 --- a/.github/workflows/publish-to-test-pypi.yml +++ b/.github/workflows/publish-to-test-pypi.yml @@ -239,5 +239,6 @@ jobs: pip install pytest nbval llama stack build --template together --image-type venv pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb + pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb # TODO: add trigger for integration test workflow & docker builds diff --git a/distributions/dell-tgi/run.yaml b/distributions/dell-tgi/run.yaml index 3f8a98779..cd6ddcfdf 100644 --- a/distributions/dell-tgi/run.yaml +++ b/distributions/dell-tgi/run.yaml @@ -1,6 +1,6 @@ version: '2' image_name: local -docker_image: null +container_image: null conda_env: local apis: - shields diff --git a/distributions/dependencies.json b/distributions/dependencies.json index d6d60ef7c..7b5d8b002 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -13,6 +13,7 @@ "httpx", "huggingface_hub", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -45,6 +46,7 @@ "fire", "httpx", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -78,6 +80,7 @@ "fire", "httpx", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -101,14 +104,17 @@ ], "remote-vllm": [ "aiosqlite", + "autoevals", "blobfile", "chardet", "chromadb-client", + "datasets", "faiss-cpu", "fastapi", "fire", "httpx", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -142,6 +148,7 @@ "fireworks-ai", "httpx", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -176,6 +183,7 @@ "httpx", "huggingface_hub", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -209,6 +217,7 @@ "fire", "httpx", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -244,6 +253,7 @@ "httpx", "lm-format-enforcer", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -279,6 +289,7 @@ "fire", "httpx", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -315,6 +326,7 @@ "httpx", "lm-format-enforcer", "matplotlib", + "mcp", "nltk", "numpy", "openai", @@ -421,6 +433,7 @@ "httpx", "huggingface_hub", "matplotlib", + "mcp", "nltk", "numpy", "openai", diff --git a/distributions/meta-reference-quantized-gpu/run.yaml b/distributions/meta-reference-quantized-gpu/run.yaml index 19c726b09..eb631adaa 100644 --- a/distributions/meta-reference-quantized-gpu/run.yaml +++ b/distributions/meta-reference-quantized-gpu/run.yaml @@ -1,6 +1,6 @@ version: '2' image_name: local -docker_image: null +container_image: null conda_env: local apis: - shields diff --git a/distributions/vllm-gpu/run.yaml b/distributions/vllm-gpu/run.yaml index f42c942a3..a75a4c451 100644 --- a/distributions/vllm-gpu/run.yaml +++ b/distributions/vllm-gpu/run.yaml @@ -1,6 +1,6 @@ version: '2' image_name: local -docker_image: null +container_image: null conda_env: local apis: - shields diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index 921869b33..1db7c0280 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -481,7 +481,7 @@ "- telemetry\n", "conda_env: together\n", "datasets: []\n", - "docker_image: null\n", + "container_image: null\n", "eval_tasks: []\n", "image_name: together\n", "memory_banks: []\n", @@ -600,7 +600,7 @@ "- telemetry\n", "conda_env: together\n", "datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "docker_image: null\n", + "container_image: null\n", "eval_tasks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "image_name: together\n", "memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 83891b7ac..a552ce69d 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -105,6 +105,7 @@ } ], "source": [ + "# NBVAL_SKIP\n", "!pip install -U llama-stack" ] }, @@ -309,12 +310,13 @@ } ], "source": [ + "# NBVAL_SKIP\n", "!llama stack build --template together --image-type venv" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -328,7 +330,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Warning: `bwrap` is not available. Code interpreter tool will not work correctly.\n" + "Not in Google Colab environment\n", + "\u001b[33mWarning: `bwrap` is not available. Code interpreter tool will not work correctly.\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/master/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { @@ -356,63 +367,83 @@ "- safety\n", "- scoring\n", "- telemetry\n", - "conda_env: together\n", + "- tool_runtime\n", "datasets: []\n", - "docker_image: null\n", + "container_image: null\n", "eval_tasks: []\n", "image_name: together\n", "memory_banks: []\n", "metadata_store:\n", - " db_path: /root/.llama/distributions/together/registry.db\n", + " db_path: /Users/xiyan/.llama/distributions/together/registry.db\n", " namespace: null\n", " type: sqlite\n", "models:\n", "- metadata: {}\n", " model_id: meta-llama/Llama-3.1-8B-Instruct\n", - " model_type: &id001 !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", - " provider_id: null\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n", "- metadata: {}\n", " model_id: meta-llama/Llama-3.1-70B-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n", "- metadata: {}\n", " model_id: meta-llama/Llama-3.1-405B-Instruct-FP8\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n", "- metadata: {}\n", " model_id: meta-llama/Llama-3.2-3B-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo\n", "- metadata: {}\n", " model_id: meta-llama/Llama-3.2-11B-Vision-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n", "- metadata: {}\n", " model_id: meta-llama/Llama-3.2-90B-Vision-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n", "- metadata: {}\n", + " model_id: meta-llama/Llama-3.3-70B-Instruct\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo\n", + "- metadata: {}\n", " model_id: meta-llama/Llama-Guard-3-8B\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-Guard-3-8B\n", "- metadata: {}\n", " model_id: meta-llama/Llama-Guard-3-11B-Vision\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo\n", + "- metadata:\n", + " embedding_dimension: 384\n", + " model_id: all-MiniLM-L6-v2\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - embedding\n", + " provider_id: sentence-transformers\n", + " provider_model_id: null\n", "providers:\n", " agents:\n", " - config:\n", " persistence_store:\n", - " db_path: /root/.llama/distributions/together/agents_store.db\n", + " db_path: /Users/xiyan/.llama/distributions/together/agents_store.db\n", " namespace: null\n", " type: sqlite\n", " provider_id: meta-reference\n", @@ -430,14 +461,17 @@ " provider_type: inline::meta-reference\n", " inference:\n", " - config:\n", - " api_key: 4985b03e627419b2964d34b8519ac6c4319f094d1ffb4f45514b4eb87e5427a2\n", + " api_key: '********'\n", " url: https://api.together.xyz/v1\n", " provider_id: together\n", " provider_type: remote::together\n", + " - config: {}\n", + " provider_id: sentence-transformers\n", + " provider_type: inline::sentence-transformers\n", " memory:\n", " - config:\n", " kvstore:\n", - " db_path: /root/.llama/distributions/together/faiss_store.db\n", + " db_path: /Users/xiyan/.llama/distributions/together/faiss_store.db\n", " namespace: null\n", " type: sqlite\n", " provider_id: faiss\n", @@ -454,22 +488,52 @@ " provider_id: llm-as-judge\n", " provider_type: inline::llm-as-judge\n", " - config:\n", - " openai_api_key: ''\n", + " openai_api_key: '********'\n", " provider_id: braintrust\n", " provider_type: inline::braintrust\n", " telemetry:\n", " - config:\n", " service_name: llama-stack\n", " sinks: sqlite\n", - " sqlite_db_path: /root/.llama/distributions/together/trace_store.db\n", + " sqlite_db_path: /Users/xiyan/.llama/distributions/together/trace_store.db\n", " provider_id: meta-reference\n", " provider_type: inline::meta-reference\n", + " tool_runtime:\n", + " - config:\n", + " api_key: '********'\n", + " max_results: 3\n", + " provider_id: brave-search\n", + " provider_type: remote::brave-search\n", + " - config:\n", + " api_key: '********'\n", + " max_results: 3\n", + " provider_id: tavily-search\n", + " provider_type: remote::tavily-search\n", + " - config: {}\n", + " provider_id: code-interpreter\n", + " provider_type: inline::code-interpreter\n", + " - config: {}\n", + " provider_id: memory-runtime\n", + " provider_type: inline::memory-runtime\n", "scoring_fns: []\n", "shields:\n", "- params: null\n", " provider_id: null\n", " provider_shield_id: null\n", " shield_id: meta-llama/Llama-Guard-3-8B\n", + "tool_groups:\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: tavily-search\n", + " toolgroup_id: builtin::websearch\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: memory-runtime\n", + " toolgroup_id: builtin::memory\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: code-interpreter\n", + " toolgroup_id: builtin::code_interpreter\n", "version: '2'\n", "\n", "\n" @@ -484,63 +548,83 @@ "- safety\n", "- scoring\n", "- telemetry\n", - "conda_env: together\n", + "- tool_runtime\n", "datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "docker_image: null\n", + "container_image: null\n", "eval_tasks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "image_name: together\n", "memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "metadata_store:\n", - " db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mregistry.db\u001b[0m\n", + " db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mregistry.db\u001b[0m\n", " namespace: null\n", " type: sqlite\n", "models:\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n", - " model_type: &id001 !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", " - llm\n", - " provider_id: null\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", + " provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n", + "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n", "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", " model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision\n", - " model_type: *id001\n", - " provider_id: null\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - llm\n", + " provider_id: together\n", " provider_model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n", + "- metadata:\n", + " embedding_dimension: \u001b[1;36m384\u001b[0m\n", + " model_id: all-MiniLM-L6-v2\n", + " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", + " - embedding\n", + " provider_id: sentence-transformers\n", + " provider_model_id: null\n", "providers:\n", " agents:\n", " - config:\n", " persistence_store:\n", - " db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95magents_store.db\u001b[0m\n", + " db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95magents_store.db\u001b[0m\n", " namespace: null\n", " type: sqlite\n", " provider_id: meta-reference\n", @@ -558,14 +642,17 @@ " provider_type: inline::meta-reference\n", " inference:\n", " - config:\n", - " api_key: 4985b03e627419b2964d34b8519ac6c4319f094d1ffb4f45514b4eb87e5427a2\n", + " api_key: \u001b[32m'********'\u001b[0m\n", " url: \u001b[4;94mhttps://api.together.xyz/v1\u001b[0m\n", " provider_id: together\n", " provider_type: remote::together\n", + " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " provider_id: sentence-transformers\n", + " provider_type: inline::sentence-transformers\n", " memory:\n", " - config:\n", " kvstore:\n", - " db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n", + " db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n", " namespace: null\n", " type: sqlite\n", " provider_id: faiss\n", @@ -582,58 +669,74 @@ " provider_id: llm-as-judge\n", " provider_type: inline::llm-as-judge\n", " - config:\n", - " openai_api_key: \u001b[32m''\u001b[0m\n", + " openai_api_key: \u001b[32m'********'\u001b[0m\n", " provider_id: braintrust\n", " provider_type: inlin\u001b[1;92me::b\u001b[0mraintrust\n", " telemetry:\n", " - config:\n", " service_name: llama-stack\n", " sinks: sqlite\n", - " sqlite_db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n", + " sqlite_db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n", " provider_id: meta-reference\n", " provider_type: inline::meta-reference\n", + " tool_runtime:\n", + " - config:\n", + " api_key: \u001b[32m'********'\u001b[0m\n", + " max_results: \u001b[1;36m3\u001b[0m\n", + " provider_id: brave-search\n", + " provider_type: remot\u001b[1;92me::b\u001b[0mrave-search\n", + " - config:\n", + " api_key: \u001b[32m'********'\u001b[0m\n", + " max_results: \u001b[1;36m3\u001b[0m\n", + " provider_id: tavily-search\n", + " provider_type: remote::tavily-search\n", + " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " provider_id: code-interpreter\n", + " provider_type: inlin\u001b[1;92me::c\u001b[0mode-interpreter\n", + " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + " provider_id: memory-runtime\n", + " provider_type: inline::memory-runtime\n", "scoring_fns: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "shields:\n", "- params: null\n", " provider_id: null\n", " provider_shield_id: null\n", " shield_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n", + "tool_groups:\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: tavily-search\n", + " toolgroup_id: builtin::websearch\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: memory-runtime\n", + " toolgroup_id: builtin::memory\n", + "- args: null\n", + " mcp_endpoint: null\n", + " provider_id: code-interpreter\n", + " toolgroup_id: builtin::code_interpreter\n", "version: \u001b[32m'2'\u001b[0m\n", "\n" ] }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "Model(identifier='meta-llama/Llama-3.1-405B-Instruct', metadata={}, provider_id='together', provider_resource_id='meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo', type='model', model_type='llm')" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "import os\n", "\n", - "from google.colab import userdata\n", - "\n", - "os.environ[\"TOGETHER_API_KEY\"] = userdata.get(\"TOGETHER_API_KEY\")\n", + "try:\n", + " from google.colab import userdata\n", + " os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n", + " os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n", + "except ImportError:\n", + " print(\"Not in Google Colab environment\")\n", "\n", "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", "\n", "client = LlamaStackAsLibraryClient(\"together\")\n", - "_ = client.initialize()\n", - "\n", - "# register 405B as LLM Judge model\n", - "client.models.register(\n", - " model_id=\"meta-llama/Llama-3.1-405B-Instruct\",\n", - " provider_model_id=\"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\",\n", - " provider_id=\"together\",\n", - ")\n" + "_ = client.initialize()" ] }, { @@ -662,7 +765,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "TC_IwIAQo4q-" }, @@ -670,12 +773,12 @@ "source": [ "name = \"llamastack/mmmu\"\n", "subset = \"Agriculture\"\n", - "split = \"dev\"\n" + "split = \"dev\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -788,130 +891,13 @@ }, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "feb82e061ee44283b4a46be858ef4cd7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "README.md: 0%| | 0.00/36.0k [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c788d4e9e1e24dca9b6503689df9b631", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "dev-00000-of-00001.parquet: 0%| | 0.00/29.5M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "44f585990aa244d8ba61f892dc1ccc1c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "validation-00000-of-00001.parquet: 0%| | 0.00/165M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4fed5720f30b4b3cbbc606a4f25e223b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "test-00000-of-00003.parquet: 0%| | 0.00/461M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9659140487ca4d3ea799196d2c1ecf61", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "test-00001-of-00003.parquet: 0%| | 0.00/454M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c06f9a090fb54c74b947634bf6d11fa8", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "test-00002-of-00003.parquet: 0%| | 0.00/471M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4f788a7920c346f3b42900825bd6711a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Generating dev split: 0%| | 0/5 [00:00, ? examples/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9d2b6eabf7e14436b72bbf374b4a2a0a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Generating validation split: 0%| | 0/30 [00:00, ? examples/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "24e48376a72940679989a39a40bbe7f6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Generating test split: 0%| | 0/287 [00:00, ? examples/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + "Generating dev split: 100%|██████████| 5/5 [00:00<00:00, 139.81 examples/s]\n", + "Generating validation split: 100%|██████████| 30/30 [00:00<00:00, 258.29 examples/s]\n", + "Generating test split: 100%|██████████| 287/287 [00:01<00:00, 197.69 examples/s]\n" + ] } ], "source": [ @@ -936,7 +922,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -951,7 +937,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 5/5 [00:51<00:00, 10.28s/it]\n" + "100%|██████████| 5/5 [00:42<00:00, 8.60s/it]\n" ] }, { @@ -959,25 +945,21 @@ "text/html": [ "
EvaluateResponse(\n", "│ generations=[\n", + "│ │ {'generated_answer': 'Answer: D'},\n", "│ │ {\n", - "│ │ │ 'generated_answer': 'The Colorado potato beetle (Leptinotarsa decemlineata) is a significant pest of potatoes, causing damage to the leaves and stems of potato plants. The insect with black-colored antennae in the image is a Colorado potato beetle, which is known for its distinctive black and yellow stripes. On the other hand, the insect with tan-colored antennae is not a Colorado potato beetle and does not appear to be a pest of potatoes.\\n\\n*Answer*: B) The one with black coloured antennae'\n", + "│ │ │ 'generated_answer': 'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B) Two pathogens'\n", "│ │ },\n", "│ │ {\n", - "│ │ │ 'generated_answer': 'To determine the count of pathogens infecting this sunflower leaf, we need to analyze the image carefully. The image shows a sunflower leaf with several brown spots and patches on its surface. These brown spots and patches are indicative of fungal infections, which are common pathogens that affect sunflowers.\\n\\nUpon closer inspection, we can see that there are two distinct types of brown spots and patches on the leaf. One type is smaller and more circular in shape, while the other type is larger and more irregular in shape. This suggests that there may be two different pathogens infecting the leaf.\\n\\nHowever, without further information or testing, it is difficult to say for certain whether these two types of brown spots and patches are caused by different pathogens or if they are just different stages of the same infection. Therefore, based on the available information, the most likely answer is:\\n\\nAnswer: B) Two pathogens'\n", + "│ │ │ 'generated_answer': \"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA) Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB) Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC) Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD) Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE) Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"\n", "│ │ },\n", + "│ │ {'generated_answer': 'Answer: D'},\n", "│ │ {\n", - "│ │ │ 'generated_answer': 'Based on the image, the most likely reason for the massive gum production on the trunks of these grapefruit trees in Cyprus is a fungal infection. The gummosis, or the production of gum, is a common symptom of fungal diseases in citrus trees, and it can be caused by various factors such as root damage, water stress, or nutrient deficiencies. However, in this case, the presence of the gum on the trunks of the trees suggests that the cause is more likely related to a fungal infection.\\n\\nAnswer: E) Fungal gummosis'\n", - "│ │ },\n", - "│ │ {\n", - "│ │ │ 'generated_answer': 'The correct answer is D) Most viruses have a specific relationship with their vectors.\\n\\nExplanation:\\n\\n* Laboratory work with micro manipulators can mimic the transmission of viruses, but this is not the primary method of virus transmission in nature.\\n* Not all plant-feeding insects can transmit viruses; only specific species that have evolved to transmit particular viruses are capable of doing so.\\n* Similarly, not all plant viruses can be transmitted by insects; some are transmitted through other means such as mechanical transmission or nematodes.\\n* The correct assertion is that most viruses have a specific relationship with their vectors, meaning that each virus is typically transmitted by a specific type of insect or vector.\\n\\nAnswer: D'\n", - "│ │ },\n", - "│ │ {\n", - "│ │ │ 'generated_answer': \"The petioles of this rhubarb are splitting, and we need to determine which of the listed issues would not be the cause. \\n\\nFirst, let's consider physiological problems (A). Rhubarb is a hardy plant, but it can still experience physiological issues due to factors like temperature fluctuations, water stress, or nutrient deficiencies. These issues could potentially cause the petioles to split.\\n\\nNext, let's look at phytoplasma infection (B). Phytoplasmas are bacteria-like organisms that can infect plants, causing a range of symptoms including yellowing or browning of leaves, stunted growth, and distorted or split petioles. So, phytoplasma infection could also be a possible cause.\\n\\nNow, let's consider animal damage (D). Animals like rabbits, deer, or rodents might feed on the rhubarb leaves, causing damage to the petioles and potentially leading to splitting.\\n\\nFinally, let's think about bacteria (E). Bacterial infections can cause a range of symptoms in plants, including soft rot, leaf spot, and petiole splitting. So, bacteria could also be a potential cause.\\n\\nBased on this analysis, it seems that all of the listed issues could potentially cause the petioles of this rhubarb to split. Therefore, the correct answer is:\\n\\nAnswer: C\"\n", + "│ │ │ 'generated_answer': '**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A) Physiological problems'\n", "│ │ }\n", "│ ],\n", "│ scores={\n", "│ │ 'basic::regex_parser_multiple_choice_answer': ScoringResult(\n", - "│ │ │ aggregated_results={'accuracy': 0.2, 'num_correct': 1.0, 'num_total': 5.0},\n", + "│ │ │ aggregated_results={'accuracy': {'accuracy': 0.2, 'num_correct': 1.0, 'num_total': 5}},\n", "│ │ │ score_rows=[{'score': 0.0}, {'score': 0.0}, {'score': 0.0}, {'score': 1.0}, {'score': 0.0}]\n", "│ │ )\n", "│ }\n", @@ -987,25 +969,21 @@ "text/plain": [ "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Colorado potato beetle \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLeptinotarsa decemlineata\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is a significant pest of potatoes, causing damage to the leaves and stems of potato plants. The insect with black-colored antennae in the image is a Colorado potato beetle, which is known for its distinctive black and yellow stripes. On the other hand, the insect with tan-colored antennae is not a Colorado potato beetle and does not appear to be a pest of potatoes.\\n\\n*Answer*: B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m The one with black coloured antennae'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'To determine the count of pathogens infecting this sunflower leaf, we need to analyze the image carefully. The image shows a sunflower leaf with several brown spots and patches on its surface. These brown spots and patches are indicative of fungal infections, which are common pathogens that affect sunflowers.\\n\\nUpon closer inspection, we can see that there are two distinct types of brown spots and patches on the leaf. One type is smaller and more circular in shape, while the other type is larger and more irregular in shape. This suggests that there may be two different pathogens infecting the leaf.\\n\\nHowever, without further information or testing, it is difficult to say for certain whether these two types of brown spots and patches are caused by different pathogens or if they are just different stages of the same infection. Therefore, based on the available information, the most likely answer is:\\n\\nAnswer: B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Based on the image, the most likely reason for the massive gum production on the trunks of these grapefruit trees in Cyprus is a fungal infection. The gummosis, or the production of gum, is a common symptom of fungal diseases in citrus trees, and it can be caused by various factors such as root damage, water stress, or nutrient deficiencies. However, in this case, the presence of the gum on the trunks of the trees suggests that the cause is more likely related to a fungal infection.\\n\\nAnswer: E\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Fungal gummosis'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The correct answer is D\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Most viruses have a specific relationship with their vectors.\\n\\nExplanation:\\n\\n* Laboratory work with micro manipulators can mimic the transmission of viruses, but this is not the primary method of virus transmission in nature.\\n* Not all plant-feeding insects can transmit viruses; only specific species that have evolved to transmit particular viruses are capable of doing so.\\n* Similarly, not all plant viruses can be transmitted by insects; some are transmitted through other means such as mechanical transmission or nematodes.\\n* The correct assertion is that most viruses have a specific relationship with their vectors, meaning that each virus is typically transmitted by a specific type of insect or vector.\\n\\nAnswer: D'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The petioles of this rhubarb are splitting, and we need to determine which of the listed issues would not be the cause. \\n\\nFirst, let's consider physiological problems \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Rhubarb is a hardy plant, but it can still experience physiological issues due to factors like temperature fluctuations, water stress, or nutrient deficiencies. These issues could potentially cause the petioles to split.\\n\\nNext, let's look at phytoplasma infection \u001b[0m\u001b[32m(\u001b[0m\u001b[32mB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Phytoplasmas are bacteria-like organisms that can infect plants, causing a range of symptoms including yellowing or browning of leaves, stunted growth, and distorted or split petioles. So, phytoplasma infection could also be a possible cause.\\n\\nNow, let's consider animal damage \u001b[0m\u001b[32m(\u001b[0m\u001b[32mD\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Animals like rabbits, deer, or rodents might feed on the rhubarb leaves, causing damage to the petioles and potentially leading to splitting.\\n\\nFinally, let's think about bacteria \u001b[0m\u001b[32m(\u001b[0m\u001b[32mE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Bacterial infections can cause a range of symptoms in plants, including soft rot, leaf spot, and petiole splitting. So, bacteria could also be a potential cause.\\n\\nBased on this analysis, it seems that all of the listed issues could potentially cause the petioles of this rhubarb to split. Therefore, the correct answer is:\\n\\nAnswer: C\"\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological problems'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'basic::regex_parser_multiple_choice_answer'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'num_correct'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'num_total'\u001b[0m: \u001b[1;36m5.0\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'num_correct'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'num_total'\u001b[0m: \u001b[1;36m5\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m1.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", @@ -1056,7 +1034,9 @@ " \"model\": \"meta-llama/Llama-3.2-90B-Vision-Instruct\",\n", " \"sampling_params\": {\n", " \"strategy\": {\n", - " \"type\": \"greedy\",\n", + " \"type\": \"top_p\",\n", + " \"temperature\": 1.0,\n", + " \"top_p\": 0.95,\n", " },\n", " \"max_tokens\": 4096,\n", " \"repeat_penalty\": 1.0,\n", @@ -1081,7 +1061,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "id": "HXmZf3Ymw-aX" }, @@ -1108,7 +1088,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "id": "Gc8azb4Rxr5J" }, @@ -1122,7 +1102,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1136,7 +1116,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 5/5 [00:48<00:00, 9.68s/it]\n" + " 0%| | 0/5 [00:00, ?it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 5/5 [00:31<00:00, 6.38s/it]\n" ] }, { @@ -1144,29 +1131,25 @@ "text/html": [ "EvaluateResponse(\n", "│ generations=[\n", - "│ │ {'generated_answer': 'The recipient of the IEEE Frank Rosenblatt Award in 2010 was Vladimir Vapnik'},\n", + "│ │ {'generated_answer': \"I'm not sure who received the IEEE Frank Rosenblatt Award in 2010.\"},\n", + "│ │ {'generated_answer': \"I'm not aware of the information about the 2018 Jerlov Award recipient.\"},\n", "│ │ {\n", - "│ │ │ 'generated_answer': \"I am unable to verify who was awarded the Oceanography Society's Jerlov Award in 2018.\"\n", + "│ │ │ 'generated_answer': \"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\n", "│ │ },\n", + "│ │ {'generated_answer': 'I do not have information on the Leipzig 1877 tournament.'},\n", "│ │ {\n", - "│ │ │ 'generated_answer': \"Radcliffe College was a women's liberal arts college, but it has since been integrated into Harvard University.\"\n", - "│ │ },\n", - "│ │ {\n", - "│ │ │ 'generated_answer': \"The Leipzig 1877 tournament was organized in the honor of 50th anniversary of the first chess club in Germany (the Leipzig Chess Club's) founding and of the 50th anniversary of Paul Morphy's birth\"\n", - "│ │ },\n", - "│ │ {\n", - "│ │ │ 'generated_answer': \"Karl Küchler's 1908 guidebook states that Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted 'Dying Achilles'.\"\n", + "│ │ │ 'generated_answer': \"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"\n", "│ │ }\n", "│ ],\n", "│ scores={\n", "│ │ 'llm-as-judge::405b-simpleqa': ScoringResult(\n", "│ │ │ aggregated_results={},\n", "│ │ │ score_rows=[\n", - "│ │ │ │ {'score': 'B', 'judge_feedback': 'B'},\n", + "│ │ │ │ {'score': 'C', 'judge_feedback': 'C'},\n", "│ │ │ │ {'score': 'C', 'judge_feedback': 'C'},\n", "│ │ │ │ {'score': 'A', 'judge_feedback': 'A'},\n", - "│ │ │ │ {'score': 'B', 'judge_feedback': 'B'},\n", - "│ │ │ │ {'score': 'B', 'judge_feedback': 'B'}\n", + "│ │ │ │ {'score': 'C', 'judge_feedback': 'C'},\n", + "│ │ │ │ {'score': 'C', 'judge_feedback': 'C'}\n", "│ │ │ ]\n", "│ │ )\n", "│ }\n", @@ -1176,29 +1159,25 @@ "text/plain": [ "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The recipient of the IEEE Frank Rosenblatt Award in 2010 was Vladimir Vapnik'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not sure who received the IEEE Frank Rosenblatt Award in 2010.\"\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not aware of the information about the 2018 Jerlov Award recipient.\"\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I am unable to verify who was awarded the Oceanography Society's Jerlov Award in 2018.\"\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'I do not have information on the Leipzig 1877 tournament.'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Radcliffe College was a women's liberal arts college, but it has since been integrated into Harvard University.\"\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The Leipzig 1877 tournament was organized in the honor of 50th anniversary of the first chess club in Germany \u001b[0m\u001b[32m(\u001b[0m\u001b[32mthe Leipzig Chess Club's\u001b[0m\u001b[32m)\u001b[0m\u001b[32m founding and of the 50th anniversary of Paul Morphy's birth\"\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Karl Küchler's 1908 guidebook states that Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted 'Dying Achilles'.\"\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'llm-as-judge::405b-simpleqa'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", @@ -1210,6 +1189,13 @@ } ], "source": [ + "# register 405B as LLM Judge model\n", + "client.models.register(\n", + " model_id=\"meta-llama/Llama-3.1-405B-Instruct\",\n", + " provider_model_id=\"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\",\n", + " provider_id=\"together\",\n", + ")\n", + "\n", "client.eval_tasks.register(\n", " eval_task_id=\"meta-reference::simpleqa\",\n", " dataset_id=simpleqa_dataset_id,\n", @@ -1257,7 +1243,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1271,7 +1257,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "5it [00:26, 5.29s/it]\n" + "5it [00:06, 1.33s/it]\n" ] }, { @@ -1280,27 +1266,25 @@ "EvaluateResponse(\n", "│ generations=[\n", "│ │ {\n", - "│ │ │ 'generated_answer': \"I'm sorry but I cannot find the recipient of the IEEE Frank Rosenblatt Award in 2010.\"\n", + "│ │ │ 'generated_answer': 'The IEEE Frank Rosenblatt Award was given to Professor John Shawe-Taylor in 2010 for his contributions to the foundations of kernel methods.'\n", "│ │ },\n", "│ │ {\n", - "│ │ │ 'generated_answer': \"I'm not sure who was awarded the Oceanography Society's Jerlov Award in 2018. Let me search for the information.\"\n", + "│ │ │ 'generated_answer': 'The Jerlov Award is given by The Oceanography Society to recognize outstanding contributions to the field of ocean optics. The 2018 Jerlov Award was awarded to Dr. Kendall L. Carder.'\n", "│ │ },\n", "│ │ {\n", - "│ │ │ 'generated_answer': \"The women's liberal arts college in Cambridge, Massachusetts is called Radcliffe College. However, in 1999, it merged with Harvard University and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\n", + "│ │ │ 'generated_answer': \"The women's liberal arts college in Cambridge, Massachusetts is Radcliffe College. However, in 1999, Radcliffe College merged with Harvard University to form the Radcliffe Institute for Advanced Study at Harvard University. The institute is still located in Cambridge, Massachusetts, and is dedicated to supporting women's education and research.\"\n", "│ │ },\n", + "│ │ {'generated_answer': 'The Leipzig 1877 tournament was organized in honor of Adolf Anderssen.'},\n", "│ │ {\n", - "│ │ │ 'generated_answer': 'The 1877 Leipzig tournament was organized in honor of Anderssen, a German chess master.'\n", - "│ │ },\n", - "│ │ {\n", - "│ │ │ 'generated_answer': \"Empress Elizabeth of Austria's favorite sculpture, made for her villa Achilleion at Corfu, depicted Achilles.\"\n", + "│ │ │ 'generated_answer': \"According to Karl Küchler, Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted the Dying Achilles.\"\n", "│ │ }\n", "│ ],\n", "│ scores={\n", "│ │ 'llm-as-judge::405b-simpleqa': ScoringResult(\n", "│ │ │ aggregated_results={},\n", "│ │ │ score_rows=[\n", - "│ │ │ │ {'score': 'C', 'judge_feedback': 'C.'},\n", - "│ │ │ │ {'score': 'C', 'judge_feedback': 'C'},\n", + "│ │ │ │ {'score': 'B', 'judge_feedback': 'B'},\n", + "│ │ │ │ {'score': 'B', 'judge_feedback': 'B'},\n", "│ │ │ │ {'score': 'A', 'judge_feedback': 'A'},\n", "│ │ │ │ {'score': 'A', 'judge_feedback': 'A'},\n", "│ │ │ │ {'score': 'B', 'judge_feedback': 'B'}\n", @@ -1314,27 +1298,25 @@ "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm sorry but I cannot find the recipient of the IEEE Frank Rosenblatt Award in 2010.\"\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The IEEE Frank Rosenblatt Award was given to Professor John Shawe-Taylor in 2010 for his contributions to the foundations of kernel methods.'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not sure who was awarded the Oceanography Society's Jerlov Award in 2018. Let me search for the information.\"\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Jerlov Award is given by The Oceanography Society to recognize outstanding contributions to the field of ocean optics. The 2018 Jerlov Award was awarded to Dr. Kendall L. Carder.'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The women's liberal arts college in Cambridge, Massachusetts is called Radcliffe College. However, in 1999, it merged with Harvard University and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The women's liberal arts college in Cambridge, Massachusetts is Radcliffe College. However, in 1999, Radcliffe College merged with Harvard University to form the Radcliffe Institute for Advanced Study at Harvard University. The institute is still located in Cambridge, Massachusetts, and is dedicated to supporting women's education and research.\"\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Leipzig 1877 tournament was organized in honor of Adolf Anderssen.'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The 1877 Leipzig tournament was organized in honor of Anderssen, a German chess master.'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Empress Elizabeth of Austria's favorite sculpture, made for her villa Achilleion at Corfu, depicted Achilles.\"\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"According to Karl Küchler, Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted the Dying Achilles.\"\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'llm-as-judge::405b-simpleqa'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C.'\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m\n", @@ -1350,15 +1332,17 @@ ], "source": [ "agent_config = {\n", - " \"model\": \"meta-llama/Llama-3.1-405B-Instruct\",\n", - " \"instructions\": \"You are a helpful assistant\",\n", - " \"sampling_params\": {\"strategy\": {\"type\": \"greedy\"}},\n", - " \"tools\": [\n", - " {\n", - " \"type\": \"brave_search\",\n", - " \"engine\": \"tavily\",\n", - " \"api_key\": userdata.get(\"TAVILY_SEARCH_API_KEY\"),\n", + " \"model\": \"meta-llama/Llama-3.3-70B-Instruct\",\n", + " \"instructions\": \"You are a helpful assistant that have access to tool to search the web. \",\n", + " \"sampling_params\": {\n", + " \"strategy\": {\n", + " \"type\": \"top_p\",\n", + " \"temperature\": 0.5,\n", + " \"top_p\": 0.9,\n", " }\n", + " },\n", + " \"toolgroups\": [\n", + " \"builtin::websearch\",\n", " ],\n", " \"tool_choice\": \"auto\",\n", " \"tool_prompt_format\": \"json\",\n", @@ -1381,6 +1365,13 @@ ")\n", "pprint(response)\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -1396,7 +1387,16 @@ "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.16" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb b/docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb index bed1aa2a8..df8995fd4 100644 --- a/docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb +++ b/docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb @@ -760,7 +760,7 @@ "- tool_runtime\n", "conda_env: together\n", "datasets: []\n", - "docker_image: null\n", + "container_image: null\n", "eval_tasks: []\n", "image_name: together\n", "memory_banks: []\n", @@ -942,7 +942,7 @@ "- tool_runtime\n", "conda_env: together\n", "datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "docker_image: null\n", + "container_image: null\n", "eval_tasks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "image_name: together\n", "memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 38cabdd3e..459a53888 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -6067,6 +6067,76 @@ "step" ] }, + "AgentTurnInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "agent_turn_input", + "default": "agent_turn_input" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + "ArrayType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "array", + "default": "array" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + "BooleanType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "boolean", + "default": "boolean" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + "ChatCompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "chat_completion_input", + "default": "chat_completion_input" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + "CompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "completion_input", + "default": "completion_input" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, "Dataset": { "type": "object", "properties": { @@ -6130,150 +6200,110 @@ "metadata" ] }, + "JsonType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "json", + "default": "json" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + "NumberType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "number", + "default": "number" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + "ObjectType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "object", + "default": "object" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, "ParamType": { "oneOf": [ { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "string", - "default": "string" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/StringType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "number", - "default": "number" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/NumberType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "boolean", - "default": "boolean" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/BooleanType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "array", - "default": "array" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/ArrayType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "object", - "default": "object" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/ObjectType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "json", - "default": "json" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/JsonType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "union", - "default": "union" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/UnionType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "chat_completion_input", - "default": "chat_completion_input" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/ChatCompletionInputType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "completion_input", - "default": "completion_input" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/CompletionInputType" }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "agent_turn_input", - "default": "agent_turn_input" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] + "$ref": "#/components/schemas/AgentTurnInputType" } ] }, + "StringType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "string", + "default": "string" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + "UnionType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "union", + "default": "union" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, "EvalTask": { "type": "object", "properties": { @@ -8922,6 +8952,10 @@ "name": "AgentTool", "description": "" }, + { + "name": "AgentTurnInputType", + "description": " " + }, { "name": "AgentTurnResponseEvent", "description": "Streamed agent execution response.\n\n " @@ -8965,6 +8999,10 @@ "name": "AppendRowsRequest", "description": " " }, + { + "name": "ArrayType", + "description": " " + }, { "name": "BasicScoringFnParams", "description": " " @@ -8992,6 +9030,10 @@ "name": "BenchmarkEvalTaskConfig", "description": " " }, + { + "name": "BooleanType", + "description": " " + }, { "name": "BuiltinTool", "description": " " @@ -9000,6 +9042,10 @@ "name": "CancelTrainingJobRequest", "description": " " }, + { + "name": "ChatCompletionInputType", + "description": " " + }, { "name": "ChatCompletionRequest", "description": " " @@ -9024,6 +9070,10 @@ "name": "Checkpoint", "description": "Checkpoint created during training runs\n\n " }, + { + "name": "CompletionInputType", + "description": " " + }, { "name": "CompletionMessage", "description": " " @@ -9166,6 +9216,10 @@ "name": "JobStatus", "description": " " }, + { + "name": "JsonType", + "description": " " + }, { "name": "KeyValueMemoryBank", "description": " " @@ -9283,6 +9337,14 @@ { "name": "Models" }, + { + "name": "NumberType", + "description": " " + }, + { + "name": "ObjectType", + "description": " " + }, { "name": "OptimizerConfig", "description": " " @@ -9490,6 +9552,10 @@ "name": "StopReason", "description": " " }, + { + "name": "StringType", + "description": " " + }, { "name": "StructuredLogEvent", "description": " " @@ -9622,6 +9688,10 @@ "name": "URL", "description": " " }, + { + "name": "UnionType", + "description": " " + }, { "name": "UnstructuredLogEvent", "description": " " @@ -9682,6 +9752,7 @@ "AgentSessionCreateResponse", "AgentStepResponse", "AgentTool", + "AgentTurnInputType", "AgentTurnResponseEvent", "AgentTurnResponseStepCompletePayload", "AgentTurnResponseStepProgressPayload", @@ -9692,20 +9763,24 @@ "AggregationFunctionType", "AppEvalTaskConfig", "AppendRowsRequest", + "ArrayType", "BasicScoringFnParams", "BatchChatCompletionRequest", "BatchChatCompletionResponse", "BatchCompletionRequest", "BatchCompletionResponse", "BenchmarkEvalTaskConfig", + "BooleanType", "BuiltinTool", "CancelTrainingJobRequest", + "ChatCompletionInputType", "ChatCompletionRequest", "ChatCompletionResponse", "ChatCompletionResponseEvent", "ChatCompletionResponseEventType", "ChatCompletionResponseStreamChunk", "Checkpoint", + "CompletionInputType", "CompletionMessage", "CompletionRequest", "CompletionResponse", @@ -9737,6 +9812,7 @@ "InvokeToolRequest", "Job", "JobStatus", + "JsonType", "KeyValueMemoryBank", "KeyValueMemoryBankParams", "KeywordMemoryBank", @@ -9764,6 +9840,8 @@ "Model", "ModelCandidate", "ModelType", + "NumberType", + "ObjectType", "OptimizerConfig", "OptimizerType", "PaginatedRowsResult", @@ -9812,6 +9890,7 @@ "SpanStatus", "SpanWithStatus", "StopReason", + "StringType", "StructuredLogEvent", "SupervisedFineTuneRequest", "SyntheticDataGenerateRequest", @@ -9842,6 +9921,7 @@ "TrainingConfig", "Turn", "URL", + "UnionType", "UnstructuredLogEvent", "UserMessage", "VectorMemoryBank", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 75bc25e94..9aeac6db3 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -105,6 +105,16 @@ components: - name - args type: object + AgentTurnInputType: + additionalProperties: false + properties: + type: + const: agent_turn_input + default: agent_turn_input + type: string + required: + - type + type: object AgentTurnResponseEvent: additionalProperties: false properties: @@ -290,6 +300,16 @@ components: - dataset_id - rows type: object + ArrayType: + additionalProperties: false + properties: + type: + const: array + default: array + type: string + required: + - type + type: object BasicScoringFnParams: additionalProperties: false properties: @@ -395,6 +415,16 @@ components: - type - eval_candidate type: object + BooleanType: + additionalProperties: false + properties: + type: + const: boolean + default: boolean + type: string + required: + - type + type: object BuiltinTool: enum: - brave_search @@ -410,6 +440,16 @@ components: required: - job_uuid type: object + ChatCompletionInputType: + additionalProperties: false + properties: + type: + const: chat_completion_input + default: chat_completion_input + type: string + required: + - type + type: object ChatCompletionRequest: additionalProperties: false properties: @@ -492,6 +532,16 @@ components: type: object Checkpoint: description: Checkpoint created during training runs + CompletionInputType: + additionalProperties: false + properties: + type: + const: completion_input + default: completion_input + type: string + required: + - type + type: object CompletionMessage: additionalProperties: false properties: @@ -1007,6 +1057,16 @@ components: - failed - scheduled type: string + JsonType: + additionalProperties: false + properties: + type: + const: json + default: json + type: string + required: + - type + type: object KeyValueMemoryBank: additionalProperties: false properties: @@ -1440,6 +1500,26 @@ components: - llm - embedding type: string + NumberType: + additionalProperties: false + properties: + type: + const: number + default: number + type: string + required: + - type + type: object + ObjectType: + additionalProperties: false + properties: + type: + const: object + default: object + type: string + required: + - type + type: object OptimizerConfig: additionalProperties: false properties: @@ -1488,96 +1568,16 @@ components: type: object ParamType: oneOf: - - additionalProperties: false - properties: - type: - const: string - default: string - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: number - default: number - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: boolean - default: boolean - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: array - default: array - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: object - default: object - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: json - default: json - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: union - default: union - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: chat_completion_input - default: chat_completion_input - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: completion_input - default: completion_input - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: agent_turn_input - default: agent_turn_input - type: string - required: - - type - type: object + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' PostTrainingJob: additionalProperties: false properties: @@ -2479,6 +2479,16 @@ components: - end_of_message - out_of_tokens type: string + StringType: + additionalProperties: false + properties: + type: + const: string + default: string + type: string + required: + - type + type: object StructuredLogEvent: additionalProperties: false properties: @@ -3131,6 +3141,16 @@ components: required: - uri type: object + UnionType: + additionalProperties: false + properties: + type: + const: union + default: union + type: string + required: + - type + type: object UnstructuredLogEvent: additionalProperties: false properties: @@ -5588,6 +5608,9 @@ tags: name: AgentStepResponse - description: name: AgentTool +- description: + name: AgentTurnInputType - description: 'Streamed agent execution response. @@ -5624,6 +5647,8 @@ tags: - description: name: AppendRowsRequest +- description: + name: ArrayType - description: name: BasicScoringFnParams @@ -5643,11 +5668,16 @@ tags: - description: name: BenchmarkEvalTaskConfig +- description: + name: BooleanType - description: name: BuiltinTool - description: name: CancelTrainingJobRequest +- description: + name: ChatCompletionInputType - description: name: ChatCompletionRequest @@ -5676,6 +5706,9 @@ tags: ' name: Checkpoint +- description: + name: CompletionInputType - description: name: CompletionMessage @@ -5770,6 +5803,8 @@ tags: name: Job - description: name: JobStatus +- description: + name: JsonType - description: name: KeyValueMemoryBank @@ -5847,6 +5882,10 @@ tags: - description: name: ModelType - name: Models +- description: + name: NumberType +- description: + name: ObjectType - description: name: OptimizerConfig @@ -5988,6 +6027,8 @@ tags: name: SpanWithStatus - description: name: StopReason +- description: + name: StringType - description: name: StructuredLogEvent @@ -6080,6 +6121,8 @@ tags: name: Turn - description: name: URL +- description: + name: UnionType - description: name: UnstructuredLogEvent @@ -6126,6 +6169,7 @@ x-tagGroups: - AgentSessionCreateResponse - AgentStepResponse - AgentTool + - AgentTurnInputType - AgentTurnResponseEvent - AgentTurnResponseStepCompletePayload - AgentTurnResponseStepProgressPayload @@ -6136,20 +6180,24 @@ x-tagGroups: - AggregationFunctionType - AppEvalTaskConfig - AppendRowsRequest + - ArrayType - BasicScoringFnParams - BatchChatCompletionRequest - BatchChatCompletionResponse - BatchCompletionRequest - BatchCompletionResponse - BenchmarkEvalTaskConfig + - BooleanType - BuiltinTool - CancelTrainingJobRequest + - ChatCompletionInputType - ChatCompletionRequest - ChatCompletionResponse - ChatCompletionResponseEvent - ChatCompletionResponseEventType - ChatCompletionResponseStreamChunk - Checkpoint + - CompletionInputType - CompletionMessage - CompletionRequest - CompletionResponse @@ -6181,6 +6229,7 @@ x-tagGroups: - InvokeToolRequest - Job - JobStatus + - JsonType - KeyValueMemoryBank - KeyValueMemoryBankParams - KeywordMemoryBank @@ -6208,6 +6257,8 @@ x-tagGroups: - Model - ModelCandidate - ModelType + - NumberType + - ObjectType - OptimizerConfig - OptimizerType - PaginatedRowsResult @@ -6256,6 +6307,7 @@ x-tagGroups: - SpanStatus - SpanWithStatus - StopReason + - StringType - StructuredLogEvent - SupervisedFineTuneRequest - SyntheticDataGenerateRequest @@ -6286,6 +6338,7 @@ x-tagGroups: - TrainingConfig - Turn - URL + - UnionType - UnstructuredLogEvent - UserMessage - VectorMemoryBank diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index aaf2462f7..83069aa05 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -17,13 +17,13 @@ pip install -e . llama stack build -h ``` -We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: +We will start build our distribution (in the form of a Conda environment, or Container image). In this step, we will specify: - `name`: the name for our distribution (e.g. `my-stack`) -- `image_type`: our build image type (`conda | docker`) +- `image_type`: our build image type (`conda | container`) - `distribution_spec`: our distribution specs for specifying API providers - `description`: a short description of the configurations for the distribution - `providers`: specifies the underlying implementation for serving each API endpoint - - `image_type`: `conda` | `docker` to specify whether to build the distribution in the form of Docker image or Conda environment. + - `image_type`: `conda` | `container` to specify whether to build the distribution in the form of Container image or Conda environment. After this step is complete, a file named ` -build.yaml` and template file ` -run.yaml` will be generated and saved at the output file path specified at the end of the command. @@ -35,7 +35,7 @@ After this step is complete, a file named ` -build.yaml` and template file llama stack build > Enter a name for your Llama Stack (e.g. my-local-stack): my-stack -> Enter the image type you want your Llama Stack to be built as (docker or conda): conda +> Enter the image type you want your Llama Stack to be built as (container or conda): conda Llama Stack is composed of several APIs working together. Let's select the provider types (implementations) you want to use for these APIs. @@ -348,26 +348,26 @@ llama stack build --config llama_stack/templates/ollama/build.yaml ``` ::: -:::{tab-item} Building Docker +:::{tab-item} Building Container > [!TIP] -> Podman is supported as an alternative to Docker. Set `DOCKER_BINARY` to `podman` in your environment to use Podman. +> Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman. -To build a docker image, you may start off from a template and use the `--image-type docker` flag to specify `docker` as the build image type. +To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type. ``` -llama stack build --template ollama --image-type docker +llama stack build --template ollama --image-type container ``` ``` -$ llama stack build --template ollama --image-type docker +$ llama stack build --template ollama --image-type container ... -Dockerfile created successfully in /tmp/tmp.viA3a3Rdsg/DockerfileFROM python:3.10-slim +Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim ... You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml` ``` -After this step is successful, you should be able to find the built docker image and test it with `llama stack run `. +After this step is successful, you should be able to find the built container image and test it with `llama stack run `. ::: :::: diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 7e3446863..4028ed384 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -12,7 +12,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | ### Environment Variables diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 71adfad09..dd4e51264 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -19,7 +19,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro | safety | `remote::bedrock` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index 335309729..7ed174984 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -22,7 +22,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | ### Environment Variables diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index a89719dea..269354e98 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -22,7 +22,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 26ed5d05b..937dbbdbd 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -22,7 +22,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc. diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 98d02725c..2bb5329b9 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -14,11 +14,14 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following | API | Provider(s) | |-----|-------------| | agents | `inline::meta-reference` | +| datasetio | `remote::huggingface`, `inline::localfs` | +| eval | `inline::meta-reference` | | inference | `remote::vllm` | | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | safety | `inline::llama-guard` | +| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index f4f705b12..0fd6a693c 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -23,7 +23,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference. diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 3b476c9bf..e990e273f 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -22,7 +22,7 @@ The `llamastack/distribution-together` distribution consists of the following pr | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | ### Environment Variables diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index d7c3fe9e5..602b5a635 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -1,6 +1,6 @@ # Quick Start -In this guide, we'll through how you can use the Llama Stack client SDK to build a simple RAG agent. +In this guide, we'll walk through how you can use the Llama Stack client SDK to build a simple RAG agent. The most critical requirement for running the agent is running inference on the underlying Llama model. Depending on what hardware (GPUs) you have available, you have various options. We will use `Ollama` for this purpose as it is the easiest to get started with and yet robust. diff --git a/llama_stack/apis/common/type_system.py b/llama_stack/apis/common/type_system.py index e76cfde13..fa9c5e92e 100644 --- a/llama_stack/apis/common/type_system.py +++ b/llama_stack/apis/common/type_system.py @@ -6,54 +6,65 @@ from typing import Literal, Union -from llama_models.schema_utils import register_schema +from llama_models.schema_utils import json_schema_type, register_schema from pydantic import BaseModel, Field from typing_extensions import Annotated +@json_schema_type class StringType(BaseModel): type: Literal["string"] = "string" +@json_schema_type class NumberType(BaseModel): type: Literal["number"] = "number" +@json_schema_type class BooleanType(BaseModel): type: Literal["boolean"] = "boolean" +@json_schema_type class ArrayType(BaseModel): type: Literal["array"] = "array" +@json_schema_type class ObjectType(BaseModel): type: Literal["object"] = "object" +@json_schema_type class JsonType(BaseModel): type: Literal["json"] = "json" +@json_schema_type class UnionType(BaseModel): type: Literal["union"] = "union" +@json_schema_type class ChatCompletionInputType(BaseModel): # expects List[Message] for messages type: Literal["chat_completion_input"] = "chat_completion_input" +@json_schema_type class CompletionInputType(BaseModel): # expects InterleavedTextMedia for content type: Literal["completion_input"] = "completion_input" +@json_schema_type class AgentTurnInputType(BaseModel): # expects List[Message] for messages (may also include attachments?) type: Literal["agent_turn_input"] = "agent_turn_input" +@json_schema_type class DialogType(BaseModel): # expects List[Message] for messages # this type semantically contains the output label whereas ChatCompletionInputType does not diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 08c987a50..16ca670f7 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -182,8 +182,8 @@ def _generate_run_config( """ apis = list(build_config.distribution_spec.providers.keys()) run_config = StackRunConfig( - docker_image=( - image_name if build_config.image_type == ImageType.docker.value else None + container_image=( + image_name if build_config.image_type == ImageType.container.value else None ), image_name=image_name, apis=apis, @@ -238,7 +238,7 @@ def _run_stack_build_command_from_build_config( image_name: Optional[str] = None, template_name: Optional[str] = None, ) -> None: - if build_config.image_type == ImageType.docker.value: + if build_config.image_type == ImageType.container.value: if template_name: image_name = f"distribution-{template_name}" else: diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index d00157710..48c811839 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -47,8 +47,8 @@ class StackBuild(Subcommand): self.parser.add_argument( "--image-type", type=str, - help="Image Type to use for the build. This can be either conda or docker. If not specified, will use the image type from the template config.", - choices=["conda", "docker", "venv"], + help="Image Type to use for the build. This can be either conda or container or venv. If not specified, will use the image type from the template config.", + choices=["conda", "container", "venv"], default="conda", ) diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index 11d3f705a..56f4feceb 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -27,7 +27,7 @@ class StackConfigure(Subcommand): self.parser.add_argument( "config", type=str, - help="Path to the build config file (e.g. ~/.llama/builds/ / -build.yaml). For docker, this could also be the name of the docker image. ", + help="Path to the build config file (e.g. ~/.llama/builds/ / -build.yaml). For container, this could also be the name of the container image. ", ) self.parser.add_argument( diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 9fa82bd61..e1e02d10c 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -92,9 +92,9 @@ class StackRun(Subcommand): ) if not config_file.exists() and not has_yaml_suffix: - # check if it's a build config saved to docker dir + # check if it's a build config saved to container dir config_file = Path( - BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml" + BUILDS_BASE_DIR / ImageType.container.value / f"{args.config}-run.yaml" ) if not config_file.exists() and not has_yaml_suffix: @@ -115,12 +115,12 @@ class StackRun(Subcommand): config_dict = yaml.safe_load(config_file.read_text()) config = parse_and_maybe_upgrade_config(config_dict) - if config.docker_image: + if config.container_image: script = ( importlib.resources.files("llama_stack") / "distribution/start_container.sh" ) - run_args = [script, config.docker_image] + run_args = [script, config.container_image] else: current_conda_env = os.environ.get("CONDA_DEFAULT_ENV") image_name = args.image_name or current_conda_env diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index b8b4188ac..b8d35ccdc 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -38,7 +38,7 @@ SERVER_DEPENDENCIES = [ class ImageType(Enum): - docker = "docker" + container = "container" conda = "conda" venv = "venv" @@ -77,8 +77,8 @@ def get_provider_dependencies( provider_spec = providers_for_api[provider_type] deps.extend(provider_spec.pip_packages) - if provider_spec.docker_image: - raise ValueError("A stack's dependencies cannot have a docker image") + if provider_spec.container_image: + raise ValueError("A stack's dependencies cannot have a container image") normal_deps = [] special_deps = [] @@ -109,23 +109,25 @@ def build_image( image_name: str, template_name: Optional[str] = None, ): - docker_image = build_config.distribution_spec.docker_image or "python:3.10-slim" + container_image = ( + build_config.distribution_spec.container_image or "python:3.10-slim" + ) normal_deps, special_deps = get_provider_dependencies( build_config.distribution_spec.providers ) normal_deps += SERVER_DEPENDENCIES - if build_config.image_type == ImageType.docker.value: + if build_config.image_type == ImageType.container.value: script = str( importlib.resources.files("llama_stack") / "distribution/build_container.sh" ) args = [ script, image_name, - docker_image, + container_image, str(build_file_path), - str(BUILDS_BASE_DIR / ImageType.docker.value), + str(BUILDS_BASE_DIR / ImageType.container.value), " ".join(normal_deps), ] elif build_config.image_type == ImageType.conda.value: diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 17902de0a..4c2425004 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -13,7 +13,7 @@ PYPI_VERSION=${PYPI_VERSION:-} BUILD_PLATFORM=${BUILD_PLATFORM:-} if [ "$#" -lt 4 ]; then - echo "Usage: $0 [ ]" >&2 + echo "Usage: $0 [ ]" >&2 echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn' " >&2 exit 1 fi @@ -24,7 +24,7 @@ set -euo pipefail build_name="$1" image_name="distribution-$build_name" -docker_base=$2 +container_base=$2 build_file_path=$3 host_build_dir=$4 pip_dependencies=$5 @@ -36,14 +36,14 @@ NC='\033[0m' # No Color SCRIPT_DIR=$(dirname "$(readlink -f "$0")") REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR")) -DOCKER_BINARY=${DOCKER_BINARY:-docker} -DOCKER_OPTS=${DOCKER_OPTS:-} +CONTAINER_BINARY=${CONTAINER_BINARY:-docker} +CONTAINER_OPTS=${CONTAINER_OPTS:-} TEMP_DIR=$(mktemp -d) -add_to_docker() { +add_to_container() { local input - output_file="$TEMP_DIR/Dockerfile" + output_file="$TEMP_DIR/Containerfile" if [ -t 0 ]; then printf '%s\n' "$1" >>"$output_file" else @@ -53,9 +53,9 @@ add_to_docker() { } # Update and install UBI9 components if UBI9 base image is used -if [[ $docker_base == *"registry.access.redhat.com/ubi9"* ]]; then - add_to_docker << EOF -FROM $docker_base +if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then + add_to_container << EOF +FROM $container_base WORKDIR /app RUN microdnf -y update && microdnf install -y iputils net-tools wget \ @@ -64,8 +64,8 @@ RUN microdnf -y update && microdnf install -y iputils net-tools wget \ EOF else - add_to_docker << EOF -FROM $docker_base + add_to_container << EOF +FROM $container_base WORKDIR /app RUN apt-get update && apt-get install -y \ @@ -82,7 +82,7 @@ fi # Add pip dependencies first since llama-stack is what will change most often # so we can reuse layers. if [ -n "$pip_dependencies" ]; then - add_to_docker << EOF + add_to_container << EOF RUN pip install --no-cache $pip_dependencies EOF fi @@ -90,7 +90,7 @@ fi if [ -n "$special_pip_deps" ]; then IFS='#' read -ra parts <<<"$special_pip_deps" for part in "${parts[@]}"; do - add_to_docker < /dev/null && selinuxenabled; then # Disable SELinux labels -- we don't want to relabel the llama-stack source dir - DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable" + CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable" fi # Set version tag based on PyPI version @@ -200,7 +200,7 @@ else fi set -x -$DOCKER_BINARY build $DOCKER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts +$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Containerfile" "$REPO_DIR" $mounts # clean up tmp/configs set +x diff --git a/llama_stack/distribution/configure_container.sh b/llama_stack/distribution/configure_container.sh index 5f64531eb..b01251e46 100755 --- a/llama_stack/distribution/configure_container.sh +++ b/llama_stack/distribution/configure_container.sh @@ -6,8 +6,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -DOCKER_BINARY=${DOCKER_BINARY:-docker} -DOCKER_OPTS=${DOCKER_OPTS:-} +CONTAINER_BINARY=${CONTAINER_BINARY:-docker} +CONTAINER_OPTS=${CONTAINER_OPTS:-} LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-} set -euo pipefail @@ -24,13 +24,13 @@ if [ $# -lt 2 ]; then exit 1 fi -docker_image="$1" +container_image="$1" host_build_dir="$2" container_build_dir="/app/builds" if command -v selinuxenabled &> /dev/null && selinuxenabled; then # Disable SELinux labels - DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable" + CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable" fi mounts="" @@ -39,9 +39,9 @@ if [ -n "$LLAMA_STACK_DIR" ]; then fi set -x -$DOCKER_BINARY run $DOCKER_OPTS -it \ +$CONTAINER_BINARY run $CONTAINER_OPTS -it \ --entrypoint "/usr/local/bin/llama" \ -v $host_build_dir:$container_build_dir \ $mounts \ - $docker_image \ + $container_image \ stack configure ./llamastack-build.yaml --output-dir $container_build_dir diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 0a293cbc2..c1a91cf6c 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -73,7 +73,7 @@ class AutoRoutedProviderSpec(ProviderSpec): provider_type: str = "router" config_class: str = "" - docker_image: Optional[str] = None + container_image: Optional[str] = None routing_table_api: Api module: str provider_data_validator: Optional[str] = Field( @@ -89,7 +89,7 @@ class AutoRoutedProviderSpec(ProviderSpec): class RoutingTableProviderSpec(ProviderSpec): provider_type: str = "routing_table" config_class: str = "" - docker_image: Optional[str] = None + container_image: Optional[str] = None router_api: Api module: str @@ -101,7 +101,7 @@ class DistributionSpec(BaseModel): default="", description="Description of the distribution", ) - docker_image: Optional[str] = None + container_image: Optional[str] = None providers: Dict[str, Union[str, List[str]]] = Field( default_factory=dict, description=""" @@ -127,9 +127,9 @@ Reference to the distribution this package refers to. For unregistered (adhoc) p this could be just a hash """, ) - docker_image: Optional[str] = Field( + container_image: Optional[str] = Field( default=None, - description="Reference to the docker image if this package refers to a container", + description="Reference to the container image if this package refers to a container", ) apis: List[str] = Field( default_factory=list, @@ -168,5 +168,5 @@ class BuildConfig(BaseModel): ) image_type: str = Field( default="conda", - description="Type of package to build (conda | docker | venv)", + description="Type of package to build (conda | container | venv)", ) diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index d7e947a46..204555b16 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -145,7 +145,9 @@ async def resolve_impls( log.warning( f"Provider `{provider.provider_type}` for API `{api}` is deprecated and will be removed in a future release: {p.deprecation_warning}", ) - p.deps__ = [a.value for a in p.api_dependencies] + p.deps__ = [a.value for a in p.api_dependencies] + [ + a.value for a in p.optional_api_dependencies + ] spec = ProviderWithSpec( spec=p, **(provider.model_dump()), @@ -229,6 +231,9 @@ async def resolve_impls( inner_impls_by_provider_id = {f"inner-{x.value}": {} for x in router_apis} for api_str, provider in sorted_providers: deps = {a: impls[a] for a in provider.spec.api_dependencies} + for a in provider.spec.optional_api_dependencies: + if a in impls: + deps[a] = impls[a] inner_impls = {} if isinstance(provider.spec, RoutingTableProviderSpec): @@ -265,7 +270,7 @@ def topological_sort( deps.append(dep) for dep in deps: - if dep not in visited: + if dep not in visited and dep in providers_with_specs: dfs((dep, providers_with_specs[dep]), visited, stack) stack.append(api_str) diff --git a/llama_stack/distribution/start_container.sh b/llama_stack/distribution/start_container.sh index 3b49a22f8..1a55bf96d 100755 --- a/llama_stack/distribution/start_container.sh +++ b/llama_stack/distribution/start_container.sh @@ -6,8 +6,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -DOCKER_BINARY=${DOCKER_BINARY:-docker} -DOCKER_OPTS=${DOCKER_OPTS:-} +CONTAINER_BINARY=${CONTAINER_BINARY:-docker} +CONTAINER_OPTS=${CONTAINER_OPTS:-} LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-} LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-} TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-} @@ -31,7 +31,7 @@ if [ $# -lt 3 ]; then fi build_name="$1" -docker_image="localhost/distribution-$build_name" +container_image="localhost/distribution-$build_name" shift yaml_config="$1" @@ -64,7 +64,7 @@ set -x if command -v selinuxenabled &> /dev/null && selinuxenabled; then # Disable SELinux labels - DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable" + CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable" fi mounts="" @@ -73,7 +73,7 @@ if [ -n "$LLAMA_STACK_DIR" ]; then fi if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then mounts="$mounts -v $LLAMA_CHECKPOINT_DIR:/root/.llama" - DOCKER_OPTS="$DOCKER_OPTS --gpus=all" + CONTAINER_OPTS="$CONTAINER_OPTS --gpus=all" fi version_tag="latest" @@ -85,11 +85,11 @@ elif [ -n "$TEST_PYPI_VERSION" ]; then version_tag="test-$TEST_PYPI_VERSION" fi -$DOCKER_BINARY run $DOCKER_OPTS -it \ +$CONTAINER_BINARY run $CONTAINER_OPTS -it \ -p $port:$port \ $env_vars \ -v "$yaml_config:/app/config.yaml" \ $mounts \ --env LLAMA_STACK_PORT=$port \ --entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \ - $docker_image:$version_tag + $container_image:$version_tag diff --git a/llama_stack/distribution/ui/page/distribution/datasets.py b/llama_stack/distribution/ui/page/distribution/datasets.py index 44e314cde..b52356522 100644 --- a/llama_stack/distribution/ui/page/distribution/datasets.py +++ b/llama_stack/distribution/ui/page/distribution/datasets.py @@ -14,6 +14,6 @@ def datasets(): datasets_info = { d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list() } - - selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys())) - st.json(datasets_info[selected_dataset], expanded=True) + if len(datasets_info) > 0: + selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys())) + st.json(datasets_info[selected_dataset], expanded=True) diff --git a/llama_stack/distribution/ui/page/distribution/eval_tasks.py b/llama_stack/distribution/ui/page/distribution/eval_tasks.py index 4957fb178..cc7912838 100644 --- a/llama_stack/distribution/ui/page/distribution/eval_tasks.py +++ b/llama_stack/distribution/ui/page/distribution/eval_tasks.py @@ -16,7 +16,8 @@ def eval_tasks(): d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list() } - selected_eval_task = st.selectbox( - "Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect" - ) - st.json(eval_tasks_info[selected_eval_task], expanded=True) + if len(eval_tasks_info) > 0: + selected_eval_task = st.selectbox( + "Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect" + ) + st.json(eval_tasks_info[selected_eval_task], expanded=True) diff --git a/llama_stack/distribution/ui/page/distribution/providers.py b/llama_stack/distribution/ui/page/distribution/providers.py index 69f6bd771..9aeb7f2a5 100644 --- a/llama_stack/distribution/ui/page/distribution/providers.py +++ b/llama_stack/distribution/ui/page/distribution/providers.py @@ -10,11 +10,17 @@ from modules.api import llama_stack_api def providers(): st.header("🔍 API Providers") - apis_providers_info = llama_stack_api.client.providers.list() - # selected_api = st.selectbox("Select an API", list(apis_providers_info.keys())) - for api in apis_providers_info.keys(): + apis_providers_lst = llama_stack_api.client.providers.list() + api_to_providers = {} + for api_provider in apis_providers_lst: + if api_provider.api in api_to_providers: + api_to_providers[api_provider.api].append(api_provider) + else: + api_to_providers[api_provider.api] = [api_provider] + + for api in api_to_providers.keys(): st.markdown(f"###### {api}") - st.dataframe([p.to_dict() for p in apis_providers_info[api]], width=500) + st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500) providers() diff --git a/llama_stack/distribution/ui/page/playground/chat.py b/llama_stack/distribution/ui/page/playground/chat.py index 5d91ec819..cb9990b7c 100644 --- a/llama_stack/distribution/ui/page/playground/chat.py +++ b/llama_stack/distribution/ui/page/playground/chat.py @@ -121,7 +121,7 @@ if prompt := st.chat_input("Example: What is Llama Stack?"): if stream: for chunk in response: if chunk.event.event_type == "progress": - full_response += chunk.event.delta + full_response += chunk.event.delta.text message_placeholder.markdown(full_response + "▌") message_placeholder.markdown(full_response) else: diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py index 3a2ba1270..11b05718d 100644 --- a/llama_stack/distribution/ui/page/playground/rag.py +++ b/llama_stack/distribution/ui/page/playground/rag.py @@ -44,14 +44,21 @@ def rag_chat_page(): ] providers = llama_stack_api.client.providers.list() + memory_provider = None + + for x in providers: + if x.api == "memory": + memory_provider = x.provider_id + llama_stack_api.client.memory_banks.register( memory_bank_id=memory_bank_name, # Use the user-provided name params={ + "memory_bank_type": "vector", "embedding_model": "all-MiniLM-L6-v2", "chunk_size_in_tokens": 512, "overlap_size_in_tokens": 64, }, - provider_id=providers["memory"][0].provider_id, + provider_id=memory_provider, ) # insert documents using the custom bank name @@ -69,9 +76,6 @@ def rag_chat_page(): "Select Memory Banks", memory_banks, ) - memory_bank_configs = [ - {"bank_id": bank_id, "type": "vector"} for bank_id in selected_memory_banks - ] available_models = llama_stack_api.client.models.list() available_models = [ @@ -133,14 +137,13 @@ def rag_chat_page(): sampling_params={ "strategy": strategy, }, - tools=[ - { - "type": "memory", - "memory_bank_configs": memory_bank_configs, - "query_generator_config": {"type": "default", "sep": " "}, - "max_tokens_in_context": 4096, - "max_chunks": 10, - } + toolgroups=[ + dict( + name="builtin::memory", + args={ + "memory_bank_ids": [bank_id for bank_id in selected_memory_banks], + }, + ) ], tool_choice="auto", tool_prompt_format="json", @@ -179,7 +182,7 @@ def rag_chat_page(): retrieval_response = "" for log in EventLogger().log(response): log.print() - if log.role == "memory_retrieval": + if log.role == "tool_execution": retrieval_response += log.content.replace("====", "").strip() retrieval_message_placeholder.info(retrieval_response) else: diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index ce0c9f52e..94563879c 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -96,6 +96,9 @@ class ProviderSpec(BaseModel): default_factory=list, description="Higher-level API surfaces may depend on other providers to provide their functionality", ) + optional_api_dependencies: List[Api] = Field( + default_factory=list, + ) deprecation_warning: Optional[str] = Field( default=None, description="If this provider is deprecated, specify the warning message here", @@ -147,11 +150,11 @@ class InlineProviderSpec(ProviderSpec): default_factory=list, description="The pip dependencies needed for this implementation", ) - docker_image: Optional[str] = Field( + container_image: Optional[str] = Field( default=None, description=""" -The docker image to use for this implementation. If one is provided, pip_packages will be ignored. -If a provider depends on other providers, the dependencies MUST NOT specify a docker image. +The container image to use for this implementation. If one is provided, pip_packages will be ignored. +If a provider depends on other providers, the dependencies MUST NOT specify a container image. """, ) module: str = Field( @@ -194,7 +197,7 @@ API responses, specify the adapter here. ) @property - def docker_image(self) -> Optional[str]: + def container_image(self) -> Optional[str]: return None @property diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 4875f8cf0..aeeed1ac0 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -72,7 +72,7 @@ def is_tracing_enabled(tracer): class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): def __init__(self, config: TelemetryConfig, deps: Dict[str, Any]) -> None: self.config = config - self.datasetio_api = deps[Api.datasetio] + self.datasetio_api = deps.get(Api.datasetio) resource = Resource.create( { diff --git a/llama_stack/providers/registry/telemetry.py b/llama_stack/providers/registry/telemetry.py index ba7e2f806..f3b41374c 100644 --- a/llama_stack/providers/registry/telemetry.py +++ b/llama_stack/providers/registry/telemetry.py @@ -24,7 +24,7 @@ def available_providers() -> List[ProviderSpec]: "opentelemetry-sdk", "opentelemetry-exporter-otlp-proto-http", ], - api_dependencies=[Api.datasetio], + optional_api_dependencies=[Api.datasetio], module="llama_stack.providers.inline.telemetry.meta_reference", config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig", ), diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 317d05207..1dbb4ecfa 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -176,7 +176,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): media_present = request_has_media(request) if isinstance(request, ChatCompletionRequest): if media_present: - # vllm does not seem to work well with image urls, so we download the images input_dict["messages"] = [ await convert_message_to_openai_dict(m, download=True) for m in request.messages diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index f6350ed51..127fd19f3 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -6,15 +6,15 @@ from typing import AsyncGenerator, Dict, List, Optional -from llama_models.llama3.api.chat_format import ChatFormat - -from llama_models.llama3.api.datatypes import ( +from llama_models.datatypes import ( GreedySamplingStrategy, SamplingParams, - StopReason, TopKSamplingStrategy, TopPSamplingStrategy, ) + +from llama_models.llama3.api.chat_format import ChatFormat +from llama_models.llama3.api.datatypes import StopReason from pydantic import BaseModel from llama_stack.apis.common.content_types import ( diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index 7ee19fd7b..701b2ca3b 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -188,7 +188,7 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]: async def convert_image_content_to_url( media: ImageContentItem, download: bool = False, include_format: bool = True ) -> str: - if media.url and not download: + if media.url and (not download or media.url.uri.startswith("data")): return media.url.uri content, format = await localize_image_content(media) diff --git a/llama_stack/providers/utils/telemetry/dataset_mixin.py b/llama_stack/providers/utils/telemetry/dataset_mixin.py index 6806f39aa..a2bfdcb87 100644 --- a/llama_stack/providers/utils/telemetry/dataset_mixin.py +++ b/llama_stack/providers/utils/telemetry/dataset_mixin.py @@ -22,6 +22,9 @@ class TelemetryDatasetMixin: dataset_id: str, max_depth: Optional[int] = None, ) -> None: + if self.datasetio_api is None: + raise RuntimeError("DatasetIO API not available") + spans = await self.query_spans( attribute_filters=attribute_filters, attributes_to_return=attributes_to_save, diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index c80625cf6..668134be8 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -30,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } name = "bedrock" @@ -70,7 +71,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use AWS Bedrock for running LLM inference and safety", - docker_image=None, + container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=default_models, diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml index 794e54306..95b8684e3 100644 --- a/llama_stack/templates/bedrock/build.yaml +++ b/llama_stack/templates/bedrock/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 3a6922ae7..118723bbc 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -81,6 +81,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index df3b55ddd..8f6bd77af 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -92,7 +92,7 @@ def get_distribution_template() -> DistributionTemplate: name="cerebras", distro_type="self_hosted", description="Use Cerebras for running LLM inference", - docker_image=None, + container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=default_models, diff --git a/llama_stack/templates/experimental-post-training/build.yaml b/llama_stack/templates/experimental-post-training/build.yaml index 4997ab8a3..618e8ff97 100644 --- a/llama_stack/templates/experimental-post-training/build.yaml +++ b/llama_stack/templates/experimental-post-training/build.yaml @@ -2,7 +2,7 @@ version: '2' name: experimental-post-training distribution_spec: description: Experimental template for post training - docker_image: null + container_image: null providers: inference: - inline::meta-reference diff --git a/llama_stack/templates/experimental-post-training/run.yaml b/llama_stack/templates/experimental-post-training/run.yaml index 2e0ee029b..87465137f 100644 --- a/llama_stack/templates/experimental-post-training/run.yaml +++ b/llama_stack/templates/experimental-post-training/run.yaml @@ -1,6 +1,6 @@ version: '2' image_name: experimental-post-training -docker_image: null +container_image: null conda_env: experimental-post-training apis: - agents diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index 504c913bd..d8e1e27ee 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 8add75f7d..14fd392c4 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } @@ -98,7 +99,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use Fireworks.AI for running LLM inference", - docker_image=None, + container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=default_models, diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 8fefbd98a..dd21120ed 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -92,6 +92,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 53128f456..993417b50 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -86,6 +86,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml index 43486030e..f4fdc4a3d 100644 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 54aaa56ac..1a5c23a42 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } name = "hf-endpoint" @@ -88,7 +89,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", - docker_image=None, + container_image=None, template_path=None, providers=providers, default_models=[inference_model, safety_model], diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 6a52ca861..537e4024f 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -91,6 +91,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index c019c587a..b31f28434 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -86,6 +86,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index e1328bd58..d075a7449 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index 788faa986..0292f13e2 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } @@ -89,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", - docker_image=None, + container_image=None, template_path=None, providers=providers, default_models=[inference_model, safety_model], diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index 0a64de358..484b2d0bd 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -91,6 +91,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index f04213533..a75baf1f9 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -86,6 +86,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index 9ad7b26bf..a75d3604b 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 7364ee422..584d38256 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -38,6 +38,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } name = "meta-reference-gpu" diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 591afa2be..9dbdb6fa5 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -93,6 +93,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index ecde69fdf..6465215f0 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -87,6 +87,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml index e6b64ea1e..4c3e2f492 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index 5c40134af..56293f42c 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } default_tool_groups = [ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index ff0affafb..059034741 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -89,6 +89,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index 56124552b..7bd2a3865 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -26,4 +26,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index cfa86dbe7..e72fe359f 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -29,6 +29,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } @@ -68,7 +69,7 @@ def get_distribution_template() -> DistributionTemplate: name="nvidia", distro_type="remote_hosted", description="Use NVIDIA NIM for running LLM inference", - docker_image=None, + container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=default_models, diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 578f70c9d..07c901371 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -83,6 +83,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 0473f8692..2288ea3a6 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -90,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use (an external) Ollama server for running LLM inference", - docker_image=None, + container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=[inference_model, safety_model], diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index 2659c8190..6f301914c 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -12,6 +12,15 @@ distribution_spec: - inline::llama-guard agents: - inline::meta-reference + eval: + - inline::meta-reference + datasetio: + - remote::huggingface + - inline::localfs + scoring: + - inline::basic + - inline::llm-as-judge + - inline::braintrust telemetry: - inline::meta-reference tool_runtime: @@ -19,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 4bf73bbda..5e5bd6af6 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -2,9 +2,12 @@ version: '2' image_name: remote-vllm apis: - agents +- datasetio +- eval - inference - memory - safety +- scoring - telemetry - tool_runtime providers: @@ -44,6 +47,28 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: {} + - provider_id: localfs + provider_type: inline::localfs + config: {} + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -68,6 +93,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index c35694d73..4eac4dad7 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -2,9 +2,12 @@ version: '2' image_name: remote-vllm apis: - agents +- datasetio +- eval - inference - memory - safety +- scoring - telemetry - tool_runtime providers: @@ -38,6 +41,28 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: {} + - provider_id: localfs + provider_type: inline::localfs + config: {} + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -62,6 +87,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 9dcaf2414..296e2b4f5 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -27,12 +27,16 @@ def get_distribution_template() -> DistributionTemplate: "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], + "eval": ["inline::meta-reference"], + "datasetio": ["remote::huggingface", "inline::localfs"], + "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "telemetry": ["inline::meta-reference"], "tool_runtime": [ "remote::brave-search", "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } name = "remote-vllm" diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 7f67bd793..78f57b795 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -37,7 +37,7 @@ class RunConfigSettings(BaseModel): self, name: str, providers: Dict[str, List[str]], - docker_image: Optional[str] = None, + container_image: Optional[str] = None, ) -> StackRunConfig: provider_registry = get_provider_registry() @@ -83,7 +83,7 @@ class RunConfigSettings(BaseModel): return StackRunConfig( image_name=name, - docker_image=docker_image, + container_image=container_image, apis=apis, providers=provider_configs, metadata_store=SqliteKVStoreConfig.sample_run_config( @@ -112,7 +112,7 @@ class DistributionTemplate(BaseModel): # Optional configuration run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None - docker_image: Optional[str] = None + container_image: Optional[str] = None default_models: Optional[List[ModelInput]] = None @@ -121,7 +121,7 @@ class DistributionTemplate(BaseModel): name=self.name, distribution_spec=DistributionSpec( description=self.description, - docker_image=self.docker_image, + container_image=self.container_image, providers=self.providers, ), image_type="conda", # default to conda, can be overridden @@ -169,7 +169,7 @@ class DistributionTemplate(BaseModel): for yaml_pth, settings in self.run_configs.items(): run_config = settings.run_config( - self.name, self.providers, self.docker_image + self.name, self.providers, self.container_image ) with open(yaml_output_dir / yaml_pth, "w") as f: yaml.safe_dump( diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index 3bcacffb0..4391ddd5d 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index 070daedc1..9bd06d650 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -86,6 +86,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index e9696c584..2fc1b52d9 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -85,6 +85,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index b62e7719e..8ad9725e3 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -36,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } name = "tgi" @@ -92,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use (an external) TGI server for running LLM inference", - docker_image=None, + container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=[inference_model, safety_model], diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index ad970f405..ea7387a24 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 4e162aab3..c1461d75d 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -92,6 +92,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 3c4844447..da25fd144 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -86,6 +86,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index b51918a6c..1e2def3bd 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } name = "together" @@ -96,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use Together.AI for running LLM inference", - docker_image=None, + container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, default_models=default_models, diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index e068fa97e..e8a1693d0 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -28,4 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::memory-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 1cb44b052..cc0ff047f 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -89,6 +89,9 @@ providers: - provider_id: memory-runtime provider_type: inline::memory-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index dd80c15dc..71b24482d 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate: "remote::tavily-search", "inline::code-interpreter", "inline::memory-runtime", + "remote::model-context-protocol", ], } @@ -84,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate: name=name, distro_type="self_hosted", description="Use a built-in vLLM engine for running LLM inference", - docker_image=None, + container_image=None, template_path=None, providers=providers, default_models=[inference_model], diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index d6d88a34f..bfe279e24 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -182,7 +182,8 @@ def test_builtin_tool_web_search(llama_stack_client, agent_config): assert "tool_execution>" in logs_str assert "Tool:brave_search Response:" in logs_str assert "mark zuckerberg" in logs_str.lower() - assert "No Violation" in logs_str + if len(agent_config["output_shields"]) > 0: + assert "No Violation" in logs_str def test_builtin_tool_code_execution(llama_stack_client, agent_config): diff --git a/tests/client-sdk/inference/dog.png b/tests/client-sdk/inference/dog.png new file mode 100644 index 000000000..2d502e606 Binary files /dev/null and b/tests/client-sdk/inference/dog.png differ diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py index 19314e4ab..ac2c4ce38 100644 --- a/tests/client-sdk/inference/test_inference.py +++ b/tests/client-sdk/inference/test_inference.py @@ -4,6 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import base64 +import os + import pytest from pydantic import BaseModel @@ -69,6 +72,16 @@ def get_weather_tool_definition(): } +@pytest.fixture +def base64_image_url(): + image_path = os.path.join(os.path.dirname(__file__), "dog.png") + with open(image_path, "rb") as image_file: + # Convert the image to base64 + base64_string = base64.b64encode(image_file.read()).decode("utf-8") + base64_url = f"data:image;base64,{base64_string}" + return base64_url + + def test_completion_non_streaming(llama_stack_client, text_model_id): response = llama_stack_client.inference.completion( content="Complete the sentence using one word: Roses are red, violets are ", @@ -356,3 +369,31 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id): streamed_content += chunk.event.delta.text.lower() assert len(streamed_content) > 0 assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"}) + + +def test_image_chat_completion_base64_url( + llama_stack_client, vision_model_id, base64_image_url +): + + message = { + "role": "user", + "content": [ + { + "type": "image", + "url": { + "uri": base64_image_url, + }, + }, + { + "type": "text", + "text": "Describe what is in this image.", + }, + ], + } + response = llama_stack_client.inference.chat_completion( + model_id=vision_model_id, + messages=[message], + stream=False, + ) + message_content = response.completion_message.content.lower().strip() + assert len(message_content) > 0