diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index cf53459b9..8268a0085 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -10,6 +10,7 @@ on: - 'llama_stack/distribution/build.*' - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' + - 'llama_stack/templates/**' pull_request: paths: - 'llama_stack/cli/stack/build.py' @@ -17,6 +18,7 @@ on: - 'llama_stack/distribution/build.*' - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' + - 'llama_stack/templates/**' concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index fe3f4d8a6..5fd3cc3f5 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -11,7 +11,7 @@ distribution_spec: - remote::gemini - remote::groq - remote::sambanova - - remote::remote-vllm + - remote::vllm - inline::sentence-transformers vector_io: - inline::sqlite-vec diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 26f2e0f0d..4732afa77 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -48,8 +48,8 @@ providers: config: url: https://api.sambanova.ai/v1 api_key: ${env.SAMBANOVA_API_KEY:} - - provider_id: remote-vllm - provider_type: remote::remote-vllm + - provider_id: vllm + provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} max_tokens: ${env.VLLM_MAX_TOKENS:4096} diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index ff9bee160..650ecc87f 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -108,7 +108,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"), ), ( - "remote-vllm", + "vllm", [], VLLMInferenceAdapterConfig.sample_run_config( url="${env.VLLM_URL:http://localhost:8000/v1}",