From 0cd98c957e3247104e945b65e0aee9e503db6d8e Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Tue, 25 Nov 2025 12:56:57 -0500 Subject: [PATCH] chore: rename run.yaml to config.yaml since we only have one config, lets call it config.yaml! this should be treated as the source of truth for starting a stack change all file names, tests, etc. Signed-off-by: Charlie Doern --- .github/workflows/README.md | 2 +- .github/workflows/backward-compat.yml | 52 +++++++++---------- .github/workflows/integration-auth-tests.yml | 18 +++---- .github/workflows/providers-build.yml | 4 +- .github/workflows/providers-list-deps.yml | 2 +- .../test-external-provider-module.yml | 6 +-- CHANGELOG.md | 2 +- docs/docs/advanced_apis/evaluation.mdx | 2 +- docs/docs/building_applications/rag.mdx | 2 +- docs/docs/building_applications/tools.mdx | 2 +- docs/docs/concepts/evaluation_concepts.mdx | 2 +- docs/docs/contributing/new_api_provider.mdx | 2 +- docs/docs/deploying/kubernetes_deployment.mdx | 6 +-- docs/docs/distributions/building_distro.mdx | 10 ++-- docs/docs/distributions/configuration.mdx | 4 +- .../distributions/customizing_run_yaml.mdx | 18 +++---- .../distributions/importing_as_library.mdx | 2 +- docs/docs/distributions/index.mdx | 2 +- .../remote_hosted_distro/watsonx.md | 4 +- .../self_hosted_distro/dell-tgi.md | 6 +-- .../distributions/self_hosted_distro/dell.md | 4 +- .../self_hosted_distro/meta-reference-gpu.md | 12 ++--- .../self_hosted_distro/nvidia.md | 12 ++--- .../getting_started/detailed_tutorial.mdx | 2 +- .../external/external-providers-guide.mdx | 2 +- .../openai_file_operations_quick_reference.md | 2 +- .../openai_responses_limitations.mdx | 4 +- .../docs/references/evals_reference/index.mdx | 4 +- src/llama_stack/cli/stack/list_stacks.py | 8 +-- src/llama_stack/cli/stack/utils.py | 6 +-- src/llama_stack/core/routers/__init__.py | 2 +- src/llama_stack/core/routing_tables/models.py | 2 +- src/llama_stack/core/stack.py | 2 +- .../ci-tests/{run.yaml => config.yaml} | 0 .../dell/{run.yaml => config.yaml} | 0 src/llama_stack/distributions/dell/dell.py | 2 +- .../distributions/dell/doc_template.md | 12 ++--- .../{run.yaml => config.yaml} | 0 .../meta-reference-gpu/doc_template.md | 10 ++-- .../meta-reference-gpu/meta_reference.py | 2 +- .../nvidia/{run.yaml => config.yaml} | 0 .../distributions/nvidia/doc_template.md | 10 ++-- .../distributions/nvidia/nvidia.py | 2 +- .../oci/{run.yaml => config.yaml} | 0 src/llama_stack/distributions/oci/oci.py | 2 +- .../open-benchmark/{run.yaml => config.yaml} | 0 .../open-benchmark/open_benchmark.py | 2 +- .../postgres-demo/{run.yaml => config.yaml} | 0 .../starter-gpu/{run.yaml => config.yaml} | 0 .../starter/{run.yaml => config.yaml} | 0 .../distributions/starter/starter.py | 2 +- .../watsonx/{run.yaml => config.yaml} | 0 .../distributions/watsonx/watsonx.py | 2 +- src/llama_stack/log.py | 4 +- .../inline/inference/meta_reference/config.py | 2 +- .../providers/remote/inference/tgi/tgi.py | 4 +- .../providers/remote/inference/vllm/vllm.py | 2 +- tests/README.md | 2 +- tests/backward_compat/test_run_config.py | 8 +-- tests/integration/README.md | 2 +- tests/integration/conftest.py | 2 +- .../test_persistence_integration.py | 4 +- tests/unit/cli/test_stack_config.py | 2 +- tests/unit/distribution/test_stack_list.py | 4 +- 64 files changed, 147 insertions(+), 145 deletions(-) rename src/llama_stack/distributions/ci-tests/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/dell/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/meta-reference-gpu/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/nvidia/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/oci/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/open-benchmark/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/postgres-demo/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/starter-gpu/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/starter/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/watsonx/{run.yaml => config.yaml} (100%) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index bb848209f..18f9a88bf 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -4,7 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl | Name | File | Purpose | | ---- | ---- | ------- | -| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs | +| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for config.yaml configs | | Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md | | API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. | | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script | diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml index 9f950a8b9..c3e5b1f59 100644 --- a/.github/workflows/backward-compat.yml +++ b/.github/workflows/backward-compat.yml @@ -1,6 +1,6 @@ name: Backward Compatibility Check -run-name: Check backward compatibility for run.yaml configs +run-name: Check backward compatibility for config.yaml configs on: pull_request: @@ -12,7 +12,7 @@ on: paths: - 'src/llama_stack/core/datatypes.py' - 'src/llama_stack/providers/datatypes.py' - - 'src/llama_stack/distributions/**/run.yaml' + - 'src/llama_stack/distributions/**/config.yaml' - 'tests/backward_compat/**' - '.github/workflows/backward-compat.yml' @@ -45,15 +45,15 @@ jobs: run: | uv sync --group dev - - name: Extract run.yaml files from main branch + - name: Extract config.yaml files from main branch id: extract_configs run: | - # Get list of run.yaml paths from main + # Get list of config.yaml paths from main git fetch origin main - CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true) + CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/config.yaml$" || true) if [ -z "$CONFIG_PATHS" ]; then - echo "No run.yaml files found in main branch" + echo "No config.yaml files found in main branch" exit 1 fi @@ -125,7 +125,7 @@ jobs: echo "" echo "⚠️ WARNING: Breaking changes detected but acknowledged" echo "" - echo "This PR introduces backward-incompatible changes to run.yaml." + echo "This PR introduces backward-incompatible changes to config.yaml." echo "The changes have been properly acknowledged." echo "" exit 0 # Pass the check @@ -133,7 +133,7 @@ jobs: echo "" echo "❌ ERROR: Breaking changes detected without acknowledgment" echo "" - echo "This PR introduces backward-incompatible changes to run.yaml" + echo "This PR introduces backward-incompatible changes to config.yaml" echo "that will break existing user configurations." echo "" echo "To acknowledge this breaking change, do ONE of:" @@ -155,11 +155,11 @@ jobs: with: fetch-depth: 0 - - name: Extract ci-tests run.yaml from main + - name: Extract ci-tests config.yaml from main run: | git fetch origin main - git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml - echo "Extracted ci-tests run.yaml from main branch" + git show origin/main:src/llama_stack/distributions/ci-tests/config.yaml > /tmp/main-ci-tests-config.yaml + echo "Extracted ci-tests config.yaml from main branch" - name: Setup test environment uses: ./.github/actions/setup-test-environment @@ -175,7 +175,7 @@ jobs: continue-on-error: true uses: ./.github/actions/run-and-record-tests with: - stack-config: /tmp/main-ci-tests-run.yaml + stack-config: /tmp/main-ci-tests-config.yaml setup: 'ollama' inference-mode: 'replay' suite: 'base' @@ -258,21 +258,21 @@ jobs: env: GH_TOKEN: ${{ github.token }} - - name: Extract ci-tests run.yaml from release + - name: Extract ci-tests config.yaml from release if: steps.get_release.outputs.has_release == 'true' id: extract_config run: | RELEASE_TAG="${{ steps.get_release.outputs.tag }}" # Try with src/ prefix first (newer releases), then without (older releases) - if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then - echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)" + if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then + echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (src/ path)" echo "has_config=true" >> $GITHUB_OUTPUT - elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then - echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)" + elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then + echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (old path)" echo "has_config=true" >> $GITHUB_OUTPUT else - echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG" + echo "::warning::ci-tests/config.yaml not found in release $RELEASE_TAG" echo "has_config=false" >> $GITHUB_OUTPUT fi @@ -292,7 +292,7 @@ jobs: continue-on-error: true uses: ./.github/actions/run-and-record-tests with: - stack-config: /tmp/release-ci-tests-run.yaml + stack-config: /tmp/release-ci-tests-config.yaml setup: 'ollama' inference-mode: 'replay' suite: 'base' @@ -318,7 +318,7 @@ jobs: continue-on-error: true uses: ./.github/actions/run-and-record-tests with: - stack-config: /tmp/release-ci-tests-run.yaml + stack-config: /tmp/release-ci-tests-config.yaml setup: 'ollama' inference-mode: 'replay' suite: 'base' @@ -447,11 +447,11 @@ jobs: run: | RELEASE_TAG="${{ steps.get_release.outputs.tag }}" - # Get run.yaml files from the release (try both src/ and old path) - CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true) + # Get config.yaml files from the release (try both src/ and old path) + CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/config.yaml$" || true) if [ -z "$CONFIG_PATHS" ]; then - echo "::warning::No run.yaml files found in release $RELEASE_TAG" + echo "::warning::No config.yaml files found in release $RELEASE_TAG" echo "has_configs=false" >> $GITHUB_OUTPUT exit 0 fi @@ -523,7 +523,7 @@ jobs: ⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release. - Users on release \`$RELEASE_TAG\` will not be able to upgrade - - Existing run.yaml configurations will fail validation + - Existing config.yaml configurations will fail validation The tests pass on \`main\` but fail with this PR's changes. @@ -543,7 +543,7 @@ jobs: - Tests **PASS** on main branch ✅ - Tests **FAIL** on PR branch ❌ - Users on release \`$RELEASE_TAG\` will not be able to upgrade - - Existing run.yaml configurations will fail validation + - Existing config.yaml configurations will fail validation > **Note:** This is informational only and does not block merge. > Consider whether this breaking change is acceptable for users. @@ -570,7 +570,7 @@ jobs: cat >> $GITHUB_STEP_SUMMARY < $run_dir/run.yaml + cat <<'EOF' > $run_dir/config.yaml version: '2' image_name: kube apis: [] @@ -101,17 +101,17 @@ jobs: server: port: 8321 EOF - yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/run.yaml - cat $run_dir/run.yaml + yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/config.yaml + cat $run_dir/config.yaml # avoid line breaks in the server log, especially because we grep it below. export LLAMA_STACK_LOG_WIDTH=200 - nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 & + nohup uv run llama stack run $run_dir/config.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index f2559a258..02a2fb001 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -116,7 +116,7 @@ jobs: BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml) BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" - BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml" if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" fi @@ -162,7 +162,7 @@ jobs: BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml) BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" - BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml" if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" fi diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml index a2e8a87c9..02497c15e 100644 --- a/.github/workflows/providers-list-deps.yml +++ b/.github/workflows/providers-list-deps.yml @@ -102,4 +102,4 @@ jobs: USE_COPY_NOT_MOUNT: "true" LLAMA_STACK_DIR: "." run: | - uv run llama stack list-deps src/llama_stack/distributions/ci-tests/run.yaml + uv run llama stack list-deps src/llama_stack/distributions/ci-tests/config.yaml diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml index 39f2356aa..3d4e924af 100644 --- a/.github/workflows/test-external-provider-module.yml +++ b/.github/workflows/test-external-provider-module.yml @@ -43,8 +43,8 @@ jobs: nohup ramalama serve llama3.2:3b-instruct-fp16 > ramalama_server.log 2>&1 & - name: Apply image type to config file run: | - yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml - cat tests/external/ramalama-stack/run.yaml + yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/config.yaml + cat tests/external/ramalama-stack/config.yaml - name: Install distribution dependencies run: | @@ -59,7 +59,7 @@ jobs: # Use the virtual environment created by the build step (name comes from build config) source ramalama-stack-test/bin/activate uv pip list - nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 & + nohup llama stack run tests/external/ramalama-stack/config.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index c51a1b2aa..bba04fa11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,7 +85,7 @@ Published on: 2025-07-28T23:35:23Z ## Highlights * Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc. -* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [run.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/run.yaml) for more details. +* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [config.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/config.yaml) for more details. * All tests migrated to pytest now (thanks @Elbehery) * DPO implementation in the post-training provider (thanks @Nehanth) * (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back. diff --git a/docs/docs/advanced_apis/evaluation.mdx b/docs/docs/advanced_apis/evaluation.mdx index 1efaa4c5c..0d5be4184 100644 --- a/docs/docs/advanced_apis/evaluation.mdx +++ b/docs/docs/advanced_apis/evaluation.mdx @@ -96,7 +96,7 @@ We have built-in functionality to run the supported open-benchmarks using llama- Spin up llama stack server with 'open-benchmark' template ``` -llama stack run llama_stack/distributions/open-benchmark/run.yaml +llama stack run llama_stack/distributions/open-benchmark/config.yaml ``` diff --git a/docs/docs/building_applications/rag.mdx b/docs/docs/building_applications/rag.mdx index b1681dc62..4cddbe2bf 100644 --- a/docs/docs/building_applications/rag.mdx +++ b/docs/docs/building_applications/rag.mdx @@ -85,7 +85,7 @@ Llama Stack provides OpenAI-compatible RAG capabilities through: ## Configuring Default Embedding Models -To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so: +To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your config.yaml like so: ```yaml vector_stores: diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx index f7b913fef..e6fe14f50 100644 --- a/docs/docs/building_applications/tools.mdx +++ b/docs/docs/building_applications/tools.mdx @@ -85,7 +85,7 @@ Features: - Context retrieval with token limits :::note[Default Configuration] -By default, llama stack run.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers. +By default, llama stack config.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers. ::: ## Model Context Protocol (MCP) diff --git a/docs/docs/concepts/evaluation_concepts.mdx b/docs/docs/concepts/evaluation_concepts.mdx index c7a13fd70..42a7ce336 100644 --- a/docs/docs/concepts/evaluation_concepts.mdx +++ b/docs/docs/concepts/evaluation_concepts.mdx @@ -47,7 +47,7 @@ We have built-in functionality to run the supported open-benckmarks using llama- Spin up llama stack server with 'open-benchmark' template ```bash -llama stack run llama_stack/distributions/open-benchmark/run.yaml +llama stack run llama_stack/distributions/open-benchmark/config.yaml ``` #### Run eval CLI diff --git a/docs/docs/contributing/new_api_provider.mdx b/docs/docs/contributing/new_api_provider.mdx index 2efaf08b4..b2e6b4d18 100644 --- a/docs/docs/contributing/new_api_provider.mdx +++ b/docs/docs/contributing/new_api_provider.mdx @@ -14,7 +14,7 @@ This guide will walk you through the process of adding a new API provider to Lla - Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.) - Determine the provider type ([Remote](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote) or [Inline](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline)). Remote providers make requests to external services, while inline providers execute implementation locally. - Add your provider to the appropriate [Registry](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/registry/). Specify pip dependencies necessary. -- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `run.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. +- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `config.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. Here are some example PRs to help you get started: diff --git a/docs/docs/deploying/kubernetes_deployment.mdx b/docs/docs/deploying/kubernetes_deployment.mdx index 48d08f0db..bb04da033 100644 --- a/docs/docs/deploying/kubernetes_deployment.mdx +++ b/docs/docs/deploying/kubernetes_deployment.mdx @@ -133,7 +133,7 @@ For more information about the operator, see the [llama-stack-k8s-operator repos ### Step 4: Deploy Llama Stack Server using Operator Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources. -You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)). +You can optionally override the default `config.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)). ```yaml cat < ProviderSpec: [ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module. -To install Llama Stack with this external provider a user can provider the following run.yaml: +To install Llama Stack with this external provider a user can provider the following config.yaml: ```yaml version: 2 diff --git a/docs/docs/providers/files/openai_file_operations_quick_reference.md b/docs/docs/providers/files/openai_file_operations_quick_reference.md index 43e2318e2..c07bc5f9a 100644 --- a/docs/docs/providers/files/openai_file_operations_quick_reference.md +++ b/docs/docs/providers/files/openai_file_operations_quick_reference.md @@ -51,7 +51,7 @@ results = await client.vector_stores.search( > **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation. -**Basic Setup**: Configure vector_io and files providers in your run.yaml +**Basic Setup**: Configure vector_io and files providers in your config.yaml ## Common Use Cases diff --git a/docs/docs/providers/openai_responses_limitations.mdx b/docs/docs/providers/openai_responses_limitations.mdx index 19007438e..6aaf07b8b 100644 --- a/docs/docs/providers/openai_responses_limitations.mdx +++ b/docs/docs/providers/openai_responses_limitations.mdx @@ -123,7 +123,7 @@ Connectors are MCP servers maintained and managed by the Responses API provider. **Open Questions:** - Should Llama Stack include built-in support for some, all, or none of OpenAI's connectors? -- Should there be a mechanism for administrators to add custom connectors via `run.yaml` or an API? +- Should there be a mechanism for administrators to add custom connectors via `config.yaml` or an API? --- @@ -210,7 +210,7 @@ Metadata allows you to attach additional information to a response for your own **Status:** Feature Request -When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `run.yaml` or an administrative API. +When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `config.yaml` or an administrative API. --- diff --git a/docs/docs/references/evals_reference/index.mdx b/docs/docs/references/evals_reference/index.mdx index 0ec555e66..85b2d9621 100644 --- a/docs/docs/references/evals_reference/index.mdx +++ b/docs/docs/references/evals_reference/index.mdx @@ -355,7 +355,7 @@ The purpose of scoring function is to calculate the score for each example based Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe. ### Add new benchmark into template -Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/run.yaml) +Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/config.yaml) Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have - `benchmark_id`: identifier of the benchmark @@ -366,7 +366,7 @@ Secondly, you need to add the new benchmark you just created under the `benchmar Spin up llama stack server with 'open-benchmark' templates ```bash -llama stack run llama_stack/distributions/open-benchmark/run.yaml +llama stack run llama_stack/distributions/open-benchmark/config.yaml ``` Run eval benchmark CLI with your new benchmark id diff --git a/src/llama_stack/cli/stack/list_stacks.py b/src/llama_stack/cli/stack/list_stacks.py index ae59ba911..0153b3391 100644 --- a/src/llama_stack/cli/stack/list_stacks.py +++ b/src/llama_stack/cli/stack/list_stacks.py @@ -64,14 +64,14 @@ class StackListBuilds(Subcommand): for name, (path, source_type) in sorted(distributions.items()): row = [name, source_type, str(path)] # Check for build and run config files - # For built-in distributions, configs are named build.yaml and run.yaml - # For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml + # For built-in distributions, configs are named build.yaml and config.yaml + # For custom distributions, configs are named {name}-build.yaml and {name}-config.yaml if source_type == "built-in": build_config = "Yes" if (path / "build.yaml").exists() else "No" - run_config = "Yes" if (path / "run.yaml").exists() else "No" + run_config = "Yes" if (path / "config.yaml").exists() else "No" else: build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No" - run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No" + run_config = "Yes" if (path / f"{name}-config.yaml").exists() else "No" row.extend([build_config, run_config]) rows.append(row) print_table(rows, headers, separate_rows=True) diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py index cb4c754d9..e02448e04 100644 --- a/src/llama_stack/cli/stack/utils.py +++ b/src/llama_stack/cli/stack/utils.py @@ -57,7 +57,7 @@ def generate_run_config( image_name: str, ) -> Path: """ - Generate a run.yaml template file for user to edit from a build.yaml file + Generate a config.yaml template file for user to edit from a build.yaml file """ apis = list(build_config.distribution_spec.providers.keys()) distro_dir = DISTRIBS_BASE_DIR / image_name @@ -123,7 +123,7 @@ def generate_run_config( ) run_config.providers[api].append(p_spec) - run_config_file = build_dir / f"{image_name}-run.yaml" + run_config_file = build_dir / f"{image_name}-config.yaml" with open(run_config_file, "w") as f: to_write = json.loads(run_config.model_dump_json()) @@ -131,7 +131,7 @@ def generate_run_config( # Only print this message for non-container builds since it will be displayed before the # container is built - # For non-container builds, the run.yaml is generated at the very end of the build process so it + # For non-container builds, the config.yaml is generated at the very end of the build process so it # makes sense to display this message if build_config.image_type != LlamaStackImageType.CONTAINER.value: cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr) diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py index 1b7fe3556..a4327563f 100644 --- a/src/llama_stack/core/routers/__init__.py +++ b/src/llama_stack/core/routers/__init__.py @@ -9,8 +9,8 @@ from typing import Any from llama_stack.core.datatypes import ( AccessRule, RoutedProtocol, + StackConfig, ) -from llama_stack.core.datatypes import StackConfig from llama_stack.core.store import DistributionRegistry from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack_api import Api, RoutingTable diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py index 1facbb27b..bc96db985 100644 --- a/src/llama_stack/core/routing_tables/models.py +++ b/src/llama_stack/core/routing_tables/models.py @@ -224,7 +224,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): existing_models = await self.get_all_with_type("model") # we may have an alias for the model registered by the user (or during initialization - # from run.yaml) that we need to keep track of + # from config.yaml) that we need to keep track of model_ids = {} for model in existing_models: if model.provider_id != provider_id: diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 554fae303..96f9eb8b9 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -500,7 +500,7 @@ async def refresh_registry_task(impls: dict[Api, Any]): def get_stack_run_config_from_distro(distro: str) -> StackConfig: - distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml" + distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml" with importlib.resources.as_file(distro_path) as path: if not path.exists(): diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/config.yaml similarity index 100% rename from src/llama_stack/distributions/ci-tests/run.yaml rename to src/llama_stack/distributions/ci-tests/config.yaml diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/config.yaml similarity index 100% rename from src/llama_stack/distributions/dell/run.yaml rename to src/llama_stack/distributions/dell/config.yaml diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py index 52a07b7f1..50da2bd70 100644 --- a/src/llama_stack/distributions/dell/dell.py +++ b/src/llama_stack/distributions/dell/dell.py @@ -111,7 +111,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider, embedding_provider], "vector_io": [chromadb_provider], diff --git a/src/llama_stack/distributions/dell/doc_template.md b/src/llama_stack/distributions/dell/doc_template.md index 1530f665a..c3730360f 100644 --- a/src/llama_stack/distributions/dell/doc_template.md +++ b/src/llama_stack/distributions/dell/doc_template.md @@ -141,14 +141,14 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ - -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-config.yaml \ -e INFERENCE_MODEL=$INFERENCE_MODEL \ -e DEH_URL=$DEH_URL \ -e SAFETY_MODEL=$SAFETY_MODEL \ -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-{{ name }} \ - --config /root/my-run.yaml \ + --config /root/my-config.yaml \ --port $LLAMA_STACK_PORT ``` @@ -157,16 +157,16 @@ docker run \ You can also run the Docker container with a custom run configuration file by mounting it into the container: ```bash -# Set the path to your custom run.yaml file -CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +# Set the path to your custom config.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml docker run -it \ --pull always \ --network host \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ - -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ - -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \ + -e RUN_CONFIG_PATH=/app/custom-config.yaml \ -e INFERENCE_MODEL=$INFERENCE_MODEL \ -e DEH_URL=$DEH_URL \ -e CHROMA_URL=$CHROMA_URL \ diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/config.yaml similarity index 100% rename from src/llama_stack/distributions/meta-reference-gpu/run.yaml rename to src/llama_stack/distributions/meta-reference-gpu/config.yaml diff --git a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md index af71d8388..5f4caa964 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md +++ b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md @@ -73,8 +73,8 @@ docker run \ You can also run the Docker container with a custom run configuration file by mounting it into the container: ```bash -# Set the path to your custom run.yaml file -CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +# Set the path to your custom config.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml LLAMA_STACK_PORT=8321 docker run \ @@ -83,8 +83,8 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ - -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \ + -e RUN_CONFIG_PATH=/app/custom-config.yaml \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT ``` @@ -105,7 +105,7 @@ Make sure you have the Llama Stack CLI available. ```bash llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ -llama stack run distributions/{{ name }}/run.yaml \ +llama stack run distributions/{{ name }}/config.yaml \ --port 8321 ``` diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py index a515794d5..83aba6b82 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py +++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py @@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate: template_path=Path(__file__).parent / "doc_template.md", providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider, embedding_provider], "vector_io": [vector_io_provider], diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/config.yaml similarity index 100% rename from src/llama_stack/distributions/nvidia/run.yaml rename to src/llama_stack/distributions/nvidia/config.yaml diff --git a/src/llama_stack/distributions/nvidia/doc_template.md b/src/llama_stack/distributions/nvidia/doc_template.md index 054a1e3ec..7152ee268 100644 --- a/src/llama_stack/distributions/nvidia/doc_template.md +++ b/src/llama_stack/distributions/nvidia/doc_template.md @@ -128,8 +128,8 @@ docker run \ You can also run the Docker container with a custom run configuration file by mounting it into the container: ```bash -# Set the path to your custom run.yaml file -CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +# Set the path to your custom config.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml LLAMA_STACK_PORT=8321 docker run \ @@ -137,8 +137,8 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ - -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \ + -e RUN_CONFIG_PATH=/app/custom-config.yaml \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT @@ -162,7 +162,7 @@ INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack list-deps nvidia | xargs -L1 uv pip install NVIDIA_API_KEY=$NVIDIA_API_KEY \ INFERENCE_MODEL=$INFERENCE_MODEL \ -llama stack run ./run.yaml \ +llama stack run ./config.yaml \ --port 8321 ``` diff --git a/src/llama_stack/distributions/nvidia/nvidia.py b/src/llama_stack/distributions/nvidia/nvidia.py index a92a2e6f8..6959a210d 100644 --- a/src/llama_stack/distributions/nvidia/nvidia.py +++ b/src/llama_stack/distributions/nvidia/nvidia.py @@ -81,7 +81,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate: template_path=Path(__file__).parent / "doc_template.md", providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], "datasetio": [datasetio_provider], diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/config.yaml similarity index 100% rename from src/llama_stack/distributions/oci/run.yaml rename to src/llama_stack/distributions/oci/config.yaml diff --git a/src/llama_stack/distributions/oci/oci.py b/src/llama_stack/distributions/oci/oci.py index 1f21840f1..338dd3661 100644 --- a/src/llama_stack/distributions/oci/oci.py +++ b/src/llama_stack/distributions/oci/oci.py @@ -74,7 +74,7 @@ def get_distribution_template(name: str = "oci") -> DistributionTemplate: template_path=Path(__file__).parent / "doc_template.md", providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], "vector_io": [vector_io_provider], diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/config.yaml similarity index 100% rename from src/llama_stack/distributions/open-benchmark/run.yaml rename to src/llama_stack/distributions/open-benchmark/config.yaml diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py index 1f4dbf2c2..7d79231dd 100644 --- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py +++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py @@ -261,7 +261,7 @@ def get_distribution_template() -> DistributionTemplate: providers=providers, available_models_by_provider=available_models, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": inference_providers, "vector_io": vector_io_providers, diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/config.yaml similarity index 100% rename from src/llama_stack/distributions/postgres-demo/run.yaml rename to src/llama_stack/distributions/postgres-demo/config.yaml diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/config.yaml similarity index 100% rename from src/llama_stack/distributions/starter-gpu/run.yaml rename to src/llama_stack/distributions/starter-gpu/config.yaml diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/config.yaml similarity index 100% rename from src/llama_stack/distributions/starter/run.yaml rename to src/llama_stack/distributions/starter/config.yaml diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 8a0efdf1f..1576721ab 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -276,7 +276,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: template_path=None, providers=providers, run_configs={ - "run.yaml": base_run_settings, + "config.yaml": base_run_settings, "run-with-postgres-store.yaml": postgres_run_settings, }, run_config_env_vars={ diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/config.yaml similarity index 100% rename from src/llama_stack/distributions/watsonx/run.yaml rename to src/llama_stack/distributions/watsonx/config.yaml diff --git a/src/llama_stack/distributions/watsonx/watsonx.py b/src/llama_stack/distributions/watsonx/watsonx.py index d79aea872..edc011a6c 100644 --- a/src/llama_stack/distributions/watsonx/watsonx.py +++ b/src/llama_stack/distributions/watsonx/watsonx.py @@ -69,7 +69,7 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: template_path=None, providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], "files": [files_provider], diff --git a/src/llama_stack/log.py b/src/llama_stack/log.py index c11c2c06f..0bc59deaf 100644 --- a/src/llama_stack/log.py +++ b/src/llama_stack/log.py @@ -92,10 +92,10 @@ def config_to_category_levels(category: str, level: str): def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]: """ - Helper function to parse a yaml logging configuration found in the run.yaml + Helper function to parse a yaml logging configuration found in the config.yaml Parameters: - yaml_config (Logging): the logger config object found in the run.yaml + yaml_config (Logging): the logger config object found in the config.yaml Returns: Dict[str, int]: A dictionary mapping categories to their log levels. diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py index ec6e8bfe8..189133a4b 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py @@ -16,7 +16,7 @@ class MetaReferenceInferenceConfig(BaseModel): # this is a placeholder to indicate inference model id # the actual inference model id is dtermined by the moddel id in the request # Note: you need to register the model before using it for inference - # models in the resouce list in the run.yaml config will be registered automatically + # models in the resouce list in the config.yaml config will be registered automatically model: str | None = None torch_seed: int | None = None max_seq_len: int = 4096 diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py index 5dc8c33f7..976df921b 100644 --- a/src/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py @@ -51,7 +51,9 @@ class _HfAdapter(OpenAIMixin): class TGIAdapter(_HfAdapter): async def initialize(self, config: TGIImplConfig) -> None: if not config.base_url: - raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.") + raise ValueError( + "You must provide a URL in config.yaml (or via the TGI_URL environment variable) to use TGI." + ) log.info(f"Initializing TGI client with url={config.base_url}") # Extract base URL without /v1 for HF client initialization base_url_str = str(config.base_url).rstrip("/") diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py index 6664ca36b..45d9176aa 100644 --- a/src/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py @@ -46,7 +46,7 @@ class VLLMInferenceAdapter(OpenAIMixin): async def initialize(self) -> None: if not self.config.base_url: raise ValueError( - "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM." + "You must provide a URL in config.yaml (or via the VLLM_URL environment variable) to use vLLM." ) async def health(self) -> HealthResponse: diff --git a/tests/README.md b/tests/README.md index c00829d3e..5cf9d95af 100644 --- a/tests/README.md +++ b/tests/README.md @@ -35,7 +35,7 @@ For running integration tests, you must provide a few things: - **`server:`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running. - **`server::`** - same as above but with a custom port (e.g., `server:starter:8322`) - a URL which points to a Llama Stack distribution server - - a distribution name (e.g., `starter`) or a path to a `run.yaml` file + - a distribution name (e.g., `starter`) or a path to a `config.yaml` file - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface. - Any API keys you need to use should be set in the environment, or can be passed in with the --env option. diff --git a/tests/backward_compat/test_run_config.py b/tests/backward_compat/test_run_config.py index ccc18c84f..bd832df10 100644 --- a/tests/backward_compat/test_run_config.py +++ b/tests/backward_compat/test_run_config.py @@ -5,10 +5,10 @@ # the root directory of this source tree. """ -Backward compatibility test for run.yaml files. +Backward compatibility test for config.yaml files. This test ensures that changes to StackRunConfig don't break -existing run.yaml files from previous versions. +existing config.yaml files from previous versions. """ import os @@ -36,10 +36,10 @@ def get_test_configs(): else: # Local mode: test current distribution configs repo_root = Path(__file__).parent.parent.parent - config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/run.yaml")) + config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/config.yaml")) if not config_files: - pytest.skip("No run.yaml files found in distributions/") + pytest.skip("No config.yaml files found in distributions/") return config_files diff --git a/tests/integration/README.md b/tests/integration/README.md index 3559b785c..48f511261 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -25,7 +25,7 @@ Here are the most important options: - **`server:`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running. - **`server::`** - same as above but with a custom port (e.g., `server:starter:8322`) - a URL which points to a Llama Stack distribution server - - a distribution name (e.g., `starter`) or a path to a `run.yaml` file + - a distribution name (e.g., `starter`) or a path to a `config.yaml` file - a comma-separated list of api=provider pairs, e.g. `inference=ollama,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface. - `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers. diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 0d0af687f..9854eedc6 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -151,7 +151,7 @@ def pytest_addoption(parser): """ a 'pointer' to the stack. this can be either be: (a) a template name like `starter`, or - (b) a path to a run.yaml file, or + (b) a path to a config.yaml file, or (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or (d) a server config like `server:ci-tests`, or (e) a docker config like `docker:ci-tests` (builds and runs container) diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py index ff42b451f..1de737db2 100644 --- a/tests/integration/test_persistence_integration.py +++ b/tests/integration/test_persistence_integration.py @@ -17,7 +17,7 @@ from llama_stack.core.storage.datatypes import ( def test_starter_distribution_config_loads_and_resolves(): """Integration: Actual starter config should parse and have correct storage structure.""" - with open("llama_stack/distributions/starter/run.yaml") as f: + with open("llama_stack/distributions/starter/config.yaml") as f: config_dict = yaml.safe_load(f) config = StackConfig(**config_dict) @@ -47,7 +47,7 @@ def test_starter_distribution_config_loads_and_resolves(): def test_postgres_demo_distribution_config_loads(): """Integration: Postgres demo should use Postgres backend for all stores.""" - with open("llama_stack/distributions/postgres-demo/run.yaml") as f: + with open("llama_stack/distributions/postgres-demo/config.yaml") as f: config_dict = yaml.safe_load(f) config = StackConfig(**config_dict) diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index 6aefac003..a82d30805 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -302,7 +302,7 @@ def test_providers_flag_generates_config_with_api_keys(): # Read the generated config file from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR - config_file = DISTRIBS_BASE_DIR / "providers-run" / "run.yaml" + config_file = DISTRIBS_BASE_DIR / "providers-run" / "config.yaml" with open(config_file) as f: config_dict = yaml.safe_load(f) diff --git a/tests/unit/distribution/test_stack_list.py b/tests/unit/distribution/test_stack_list.py index 725ce3410..7a51ee7e6 100644 --- a/tests/unit/distribution/test_stack_list.py +++ b/tests/unit/distribution/test_stack_list.py @@ -32,7 +32,7 @@ def mock_distribs_base_dir(tmp_path): starter_custom = custom_dir / "starter" starter_custom.mkdir() (starter_custom / "starter-build.yaml").write_text("# build config") - (starter_custom / "starter-run.yaml").write_text("# run config") + (starter_custom / "starter-config.yaml").write_text("# run config") return custom_dir @@ -48,7 +48,7 @@ def mock_distro_dir(tmp_path): distro_path = distro_dir / distro_name distro_path.mkdir() (distro_path / "build.yaml").write_text("# build config") - (distro_path / "run.yaml").write_text("# run config") + (distro_path / "config.yaml").write_text("# run config") return distro_dir