Fix issue when generating vLLM distros

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
2026-01-05 11:22:29 +00:00 · 2025-01-13 18:43:23 -05:00 · 2025-01-13 18:43:23 -05:00 · 7c726826b8
commit 7c726826b8
parent 89e3f81520
3 changed files with 14 additions and 46 deletions
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -1,5 +1,6 @@
 version: '2'
 image_name: remote-vllm
 docker_image: null
 conda_env: remote-vllm
 apis:
 - agents
@ -7,7 +8,6 @@ apis:
 - memory
 - safety
 - telemetry
 - tool_runtime
 providers:
  inference:
  - provider_id: vllm-inference
@ -52,50 +52,33 @@ providers:
      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  provider_model_id: null
  model_type: llm
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: vllm-safety
  provider_model_id: null
  model_type: llm
 - metadata:
    embedding_dimension: 384
  model_id: all-MiniLM-L6-v2
  provider_id: sentence-transformers
  provider_model_id: null
  model_type: embedding
 shields:
- shield_id: ${env.SAFETY_MODEL}
+- params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
 tool_groups:
 - toolgroup_id: builtin::websearch
  provider_id: tavily-search
 - toolgroup_id: builtin::memory
  provider_id: memory-runtime
 - toolgroup_id: builtin::code_interpreter
  provider_id: code-interpreter
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -1,5 +1,6 @@
 version: '2'
 image_name: remote-vllm
 docker_image: null
 conda_env: remote-vllm
 apis:
 - agents
@ -7,7 +8,6 @@ apis:
 - memory
 - safety
 - telemetry
 - tool_runtime
 providers:
  inference:
  - provider_id: vllm-inference
@ -46,39 +46,24 @@ providers:
      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  provider_model_id: null
  model_type: llm
 - metadata:
    embedding_dimension: 384
  model_id: all-MiniLM-L6-v2
  provider_id: sentence-transformers
  provider_model_id: null
  model_type: embedding
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
 tool_groups: []
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -134,7 +134,7 @@ def get_distribution_template() -> DistributionTemplate:
                "Inference model loaded into the vLLM server",
            ),
            "VLLM_URL": (
-                "http://host.docker.internal:5100}/v1",
+                "http://host.docker.internal:5100/v1",
                "URL of the vLLM server with the main inference model",
            ),
            "MAX_TOKENS": (