From 7918188f1ef42c52fa1d4f67811d85129892cb56 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 22 Oct 2025 14:59:42 -0700 Subject: [PATCH 1/4] fix(ci): enable responses tests in CI; suppress expected MCP auth error logs (#3889) Let us enable responses suite in CI now. Also a minor fix: MCP tool tests intentionally trigger authentication failures to verify error handling, but the resulting error logs clutter test output. --- .github/workflows/integration-tests.yml | 2 +- .../responses/test_tool_responses.py | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 274c779ba..d38e8337b 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -61,7 +61,7 @@ jobs: && fromJSON('[{"setup": "vllm", "suite": "base"}]') || github.event.inputs.test-setup == 'ollama-vision' && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]') - || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]') + || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}, {"setup": "gpt", "suite": "responses"}]') }} steps: diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py index 2cff4d27d..3f1c35214 100644 --- a/tests/integration/responses/test_tool_responses.py +++ b/tests/integration/responses/test_tool_responses.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import json +import logging # allow-direct-logging import os import httpx @@ -198,7 +199,7 @@ def test_response_sequential_file_search( @pytest.mark.parametrize("case", mcp_tool_test_cases) -def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case): +def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog): if not isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("in-process MCP server is only supported in library client") @@ -245,13 +246,17 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case): if isinstance(compat_client, LlamaStackAsLibraryClient) else (httpx.HTTPStatusError, openai.AuthenticationError) ) - with pytest.raises(exc_type): - compat_client.responses.create( - model=text_model_id, - input=case.input, - tools=tools, - stream=False, - ) + # Suppress expected auth error logs only for the failing auth attempt + with caplog.at_level( + logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming" + ): + with pytest.raises(exc_type): + compat_client.responses.create( + model=text_model_id, + input=case.input, + tools=tools, + stream=False, + ) for tool in tools: if tool["type"] == "mcp": From 0e57233a0acb665e9614eda6ca7715a9e8f49e05 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 22 Oct 2025 15:26:35 -0700 Subject: [PATCH 2/4] chore(misc): update datasets, benchmarks to use alpha, beta prefixes (#3891) This will be landed together with https://github.com/llamastack/llama-stack-client-python/pull/282 (hence CI will be red on this one.) I have verified locally that tests pass with the updated version of the client-sdk. --- llama_stack/core/server/server.py | 4 +++- tests/integration/datasets/test_datasets.py | 10 +++++----- tests/integration/eval/test_eval.py | 14 +++++++------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index dd21a72f9..845686f15 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -174,7 +174,9 @@ class StackApp(FastAPI): @asynccontextmanager async def lifespan(app: StackApp): - logger.info("Starting up") + server_version = parse_version("llama-stack") + + logger.info(f"Starting up Llama Stack server (version: {server_version})") assert app.stack is not None app.stack.create_registry_refresh_task() yield diff --git a/tests/integration/datasets/test_datasets.py b/tests/integration/datasets/test_datasets.py index 60db95f30..3ad5570f0 100644 --- a/tests/integration/datasets/test_datasets.py +++ b/tests/integration/datasets/test_datasets.py @@ -78,18 +78,18 @@ def data_url_from_file(file_path: str) -> str: ], ) def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose=purpose, source=source, ) assert dataset.identifier is not None assert dataset.provider_id == provider_id - iterrow_response = llama_stack_client.datasets.iterrows(dataset.identifier, limit=limit) + iterrow_response = llama_stack_client.beta.datasets.iterrows(dataset.identifier, limit=limit) assert len(iterrow_response.data) == limit - dataset_list = llama_stack_client.datasets.list() + dataset_list = llama_stack_client.beta.datasets.list() assert dataset.identifier in [d.identifier for d in dataset_list] - llama_stack_client.datasets.unregister(dataset.identifier) - dataset_list = llama_stack_client.datasets.list() + llama_stack_client.beta.datasets.unregister(dataset.identifier) + dataset_list = llama_stack_client.beta.datasets.list() assert dataset.identifier not in [d.identifier for d in dataset_list] diff --git a/tests/integration/eval/test_eval.py b/tests/integration/eval/test_eval.py index 01581e829..98b3302e0 100644 --- a/tests/integration/eval/test_eval.py +++ b/tests/integration/eval/test_eval.py @@ -17,17 +17,17 @@ from ..datasets.test_datasets import data_url_from_file @pytest.mark.parametrize("scoring_fn_id", ["basic::equality"]) def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose="eval/messages-answer", source={ "type": "uri", "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"), }, ) - response = llama_stack_client.datasets.list() + response = llama_stack_client.beta.datasets.list() assert any(x.identifier == dataset.identifier for x in response) - rows = llama_stack_client.datasets.iterrows( + rows = llama_stack_client.beta.datasets.iterrows( dataset_id=dataset.identifier, limit=3, ) @@ -37,12 +37,12 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): scoring_fn_id, ] benchmark_id = str(uuid.uuid4()) - llama_stack_client.benchmarks.register( + llama_stack_client.alpha.benchmarks.register( benchmark_id=benchmark_id, dataset_id=dataset.identifier, scoring_functions=scoring_functions, ) - list_benchmarks = llama_stack_client.benchmarks.list() + list_benchmarks = llama_stack_client.alpha.benchmarks.list() assert any(x.identifier == benchmark_id for x in list_benchmarks) response = llama_stack_client.alpha.eval.evaluate_rows( @@ -66,7 +66,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): @pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"]) def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose="eval/messages-answer", source={ "type": "uri", @@ -74,7 +74,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): }, ) benchmark_id = str(uuid.uuid4()) - llama_stack_client.benchmarks.register( + llama_stack_client.alpha.benchmarks.register( benchmark_id=benchmark_id, dataset_id=dataset.identifier, scoring_functions=[scoring_fn_id], From 658fb2c777c26c1e127407bf3cd2e48a8d90f2f0 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 22 Oct 2025 15:33:07 -0700 Subject: [PATCH 3/4] refactor(k8s): update run configs to v2 storage and registered_resources structure Migrates k8s run configs to match the updated run configs - Replace storage.references with storage.stores - Wrap resources under registered_resources section - Update provider configs to use persistence with namespace/backend - Add telemetry and vector_stores top-level sections - Simplify agent/files metadata store configuration --- .../k8s-benchmark/stack_run_config.yaml | 100 +++++++++-------- .../distributions/k8s/stack_run_config.yaml | 106 +++++++++--------- 2 files changed, 107 insertions(+), 99 deletions(-) diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml index 2ccaa21aa..06a481f43 100644 --- a/benchmarking/k8s-benchmark/stack_run_config.yaml +++ b/benchmarking/k8s-benchmark/stack_run_config.yaml @@ -27,28 +27,24 @@ providers: config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default vector_io: - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -58,20 +54,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -112,32 +103,45 @@ storage: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: - backend: kv_default namespace: registry + backend: kv_default inference: - backend: sql_default table_name: inference_store -models: -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -- model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -shields: -- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8323 +telemetry: + enabled: true +vector_stores: + default_provider_id: chromadb + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml index 863565fdf..3c74fd436 100644 --- a/docs/docs/distributions/k8s/stack_run_config.yaml +++ b/docs/docs/distributions/k8s/stack_run_config.yaml @@ -32,21 +32,17 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -56,20 +52,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -110,40 +101,53 @@ storage: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: - backend: kv_default namespace: registry + backend: kv_default inference: - backend: sql_default table_name: inference_store -models: -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} - provider_id: vllm-safety - model_type: llm -shields: -- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + provider_id: vllm-safety + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 auth: provider_config: type: github_token +telemetry: + enabled: true +vector_stores: + default_provider_id: chromadb + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 From 5c04d2e0a798e701a85a3f6000f50f1c405a738a Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 23 Oct 2025 12:27:19 -0700 Subject: [PATCH 4/4] chore(telemetry): add an arguement to select conatiner runtime explicitly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? ## Test Plan ❯ ./scripts/telemetry/setup_telemetry.sh --container docker --- scripts/telemetry/setup_telemetry.sh | 61 ++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/scripts/telemetry/setup_telemetry.sh b/scripts/telemetry/setup_telemetry.sh index ecdd56175..ab855e8db 100755 --- a/scripts/telemetry/setup_telemetry.sh +++ b/scripts/telemetry/setup_telemetry.sh @@ -16,14 +16,59 @@ set -Eeuo pipefail -if command -v podman &> /dev/null; then - CONTAINER_RUNTIME="podman" -elif command -v docker &> /dev/null; then - CONTAINER_RUNTIME="docker" -else - echo "🚨 Neither Podman nor Docker could be found" - echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation" - exit 1 +# Parse arguments +CONTAINER_RUNTIME="" + +print_usage() { + echo "Usage: $0 [--container docker|podman]" + echo "" + echo "Options:" + echo " -c, --container Choose container runtime (docker or podman)." + echo " -h, --help Show this help." +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -c|--container) + if [[ $# -lt 2 ]]; then + echo "🚨 --container requires a value: docker or podman" + exit 1 + fi + case "$2" in + docker|podman) + CONTAINER_RUNTIME="$2" + shift 2 + ;; + *) + echo "🚨 Invalid container runtime: $2" + echo "Valid options are: docker, podman" + exit 1 + ;; + esac + ;; + -h|--help) + print_usage + exit 0 + ;; + *) + echo "🚨 Unknown argument: $1" + print_usage + exit 1 + ;; + esac +done + +# Detect container runtime if not specified +if [[ -z "$CONTAINER_RUNTIME" ]]; then + if command -v podman &> /dev/null; then + CONTAINER_RUNTIME="podman" + elif command -v docker &> /dev/null; then + CONTAINER_RUNTIME="docker" + else + echo "🚨 Neither Podman nor Docker could be found" + echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation" + exit 1 + fi fi echo "🚀 Setting up telemetry stack for Llama Stack using $CONTAINER_RUNTIME..."