diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 274c779ba..d38e8337b 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -61,7 +61,7 @@ jobs: && fromJSON('[{"setup": "vllm", "suite": "base"}]') || github.event.inputs.test-setup == 'ollama-vision' && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]') - || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]') + || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}, {"setup": "gpt", "suite": "responses"}]') }} steps: diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml index 2ccaa21aa..06a481f43 100644 --- a/benchmarking/k8s-benchmark/stack_run_config.yaml +++ b/benchmarking/k8s-benchmark/stack_run_config.yaml @@ -27,28 +27,24 @@ providers: config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default vector_io: - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -58,20 +54,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -112,32 +103,45 @@ storage: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: - backend: kv_default namespace: registry + backend: kv_default inference: - backend: sql_default table_name: inference_store -models: -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -- model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -shields: -- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8323 +telemetry: + enabled: true +vector_stores: + default_provider_id: chromadb + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml index 863565fdf..3c74fd436 100644 --- a/docs/docs/distributions/k8s/stack_run_config.yaml +++ b/docs/docs/distributions/k8s/stack_run_config.yaml @@ -32,21 +32,17 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -56,20 +52,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -110,40 +101,53 @@ storage: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: - backend: kv_default namespace: registry + backend: kv_default inference: - backend: sql_default table_name: inference_store -models: -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} - provider_id: vllm-safety - model_type: llm -shields: -- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + provider_id: vllm-safety + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 auth: provider_config: type: github_token +telemetry: + enabled: true +vector_stores: + default_provider_id: chromadb + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index dd21a72f9..845686f15 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -174,7 +174,9 @@ class StackApp(FastAPI): @asynccontextmanager async def lifespan(app: StackApp): - logger.info("Starting up") + server_version = parse_version("llama-stack") + + logger.info(f"Starting up Llama Stack server (version: {server_version})") assert app.stack is not None app.stack.create_registry_refresh_task() yield diff --git a/scripts/telemetry/setup_telemetry.sh b/scripts/telemetry/setup_telemetry.sh index ecdd56175..ab855e8db 100755 --- a/scripts/telemetry/setup_telemetry.sh +++ b/scripts/telemetry/setup_telemetry.sh @@ -16,14 +16,59 @@ set -Eeuo pipefail -if command -v podman &> /dev/null; then - CONTAINER_RUNTIME="podman" -elif command -v docker &> /dev/null; then - CONTAINER_RUNTIME="docker" -else - echo "🚨 Neither Podman nor Docker could be found" - echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation" - exit 1 +# Parse arguments +CONTAINER_RUNTIME="" + +print_usage() { + echo "Usage: $0 [--container docker|podman]" + echo "" + echo "Options:" + echo " -c, --container Choose container runtime (docker or podman)." + echo " -h, --help Show this help." +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -c|--container) + if [[ $# -lt 2 ]]; then + echo "🚨 --container requires a value: docker or podman" + exit 1 + fi + case "$2" in + docker|podman) + CONTAINER_RUNTIME="$2" + shift 2 + ;; + *) + echo "🚨 Invalid container runtime: $2" + echo "Valid options are: docker, podman" + exit 1 + ;; + esac + ;; + -h|--help) + print_usage + exit 0 + ;; + *) + echo "🚨 Unknown argument: $1" + print_usage + exit 1 + ;; + esac +done + +# Detect container runtime if not specified +if [[ -z "$CONTAINER_RUNTIME" ]]; then + if command -v podman &> /dev/null; then + CONTAINER_RUNTIME="podman" + elif command -v docker &> /dev/null; then + CONTAINER_RUNTIME="docker" + else + echo "🚨 Neither Podman nor Docker could be found" + echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation" + exit 1 + fi fi echo "🚀 Setting up telemetry stack for Llama Stack using $CONTAINER_RUNTIME..." diff --git a/tests/integration/datasets/test_datasets.py b/tests/integration/datasets/test_datasets.py index 60db95f30..3ad5570f0 100644 --- a/tests/integration/datasets/test_datasets.py +++ b/tests/integration/datasets/test_datasets.py @@ -78,18 +78,18 @@ def data_url_from_file(file_path: str) -> str: ], ) def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose=purpose, source=source, ) assert dataset.identifier is not None assert dataset.provider_id == provider_id - iterrow_response = llama_stack_client.datasets.iterrows(dataset.identifier, limit=limit) + iterrow_response = llama_stack_client.beta.datasets.iterrows(dataset.identifier, limit=limit) assert len(iterrow_response.data) == limit - dataset_list = llama_stack_client.datasets.list() + dataset_list = llama_stack_client.beta.datasets.list() assert dataset.identifier in [d.identifier for d in dataset_list] - llama_stack_client.datasets.unregister(dataset.identifier) - dataset_list = llama_stack_client.datasets.list() + llama_stack_client.beta.datasets.unregister(dataset.identifier) + dataset_list = llama_stack_client.beta.datasets.list() assert dataset.identifier not in [d.identifier for d in dataset_list] diff --git a/tests/integration/eval/test_eval.py b/tests/integration/eval/test_eval.py index 01581e829..98b3302e0 100644 --- a/tests/integration/eval/test_eval.py +++ b/tests/integration/eval/test_eval.py @@ -17,17 +17,17 @@ from ..datasets.test_datasets import data_url_from_file @pytest.mark.parametrize("scoring_fn_id", ["basic::equality"]) def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose="eval/messages-answer", source={ "type": "uri", "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"), }, ) - response = llama_stack_client.datasets.list() + response = llama_stack_client.beta.datasets.list() assert any(x.identifier == dataset.identifier for x in response) - rows = llama_stack_client.datasets.iterrows( + rows = llama_stack_client.beta.datasets.iterrows( dataset_id=dataset.identifier, limit=3, ) @@ -37,12 +37,12 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): scoring_fn_id, ] benchmark_id = str(uuid.uuid4()) - llama_stack_client.benchmarks.register( + llama_stack_client.alpha.benchmarks.register( benchmark_id=benchmark_id, dataset_id=dataset.identifier, scoring_functions=scoring_functions, ) - list_benchmarks = llama_stack_client.benchmarks.list() + list_benchmarks = llama_stack_client.alpha.benchmarks.list() assert any(x.identifier == benchmark_id for x in list_benchmarks) response = llama_stack_client.alpha.eval.evaluate_rows( @@ -66,7 +66,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): @pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"]) def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose="eval/messages-answer", source={ "type": "uri", @@ -74,7 +74,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): }, ) benchmark_id = str(uuid.uuid4()) - llama_stack_client.benchmarks.register( + llama_stack_client.alpha.benchmarks.register( benchmark_id=benchmark_id, dataset_id=dataset.identifier, scoring_functions=[scoring_fn_id], diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py index 2cff4d27d..3f1c35214 100644 --- a/tests/integration/responses/test_tool_responses.py +++ b/tests/integration/responses/test_tool_responses.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import json +import logging # allow-direct-logging import os import httpx @@ -198,7 +199,7 @@ def test_response_sequential_file_search( @pytest.mark.parametrize("case", mcp_tool_test_cases) -def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case): +def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog): if not isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("in-process MCP server is only supported in library client") @@ -245,13 +246,17 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case): if isinstance(compat_client, LlamaStackAsLibraryClient) else (httpx.HTTPStatusError, openai.AuthenticationError) ) - with pytest.raises(exc_type): - compat_client.responses.create( - model=text_model_id, - input=case.input, - tools=tools, - stream=False, - ) + # Suppress expected auth error logs only for the failing auth attempt + with caplog.at_level( + logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming" + ): + with pytest.raises(exc_type): + compat_client.responses.create( + model=text_model_id, + input=case.input, + tools=tools, + stream=False, + ) for tool in tools: if tool["type"] == "mcp":