From 7c726826b891edc2cb8b34f6d6b04893928f0a37 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 13 Jan 2025 18:43:23 -0500 Subject: [PATCH] Fix issue when generating vLLM distros Signed-off-by: Yuan Tang --- .../remote-vllm/run-with-safety.yaml | 35 +++++-------------- llama_stack/templates/remote-vllm/run.yaml | 23 +++--------- llama_stack/templates/remote-vllm/vllm.py | 2 +- 3 files changed, 14 insertions(+), 46 deletions(-) diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 1babd04ac..7097bc649 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -1,5 +1,6 @@ version: '2' image_name: remote-vllm +docker_image: null conda_env: remote-vllm apis: - agents @@ -7,7 +8,6 @@ apis: - memory - safety - telemetry -- tool_runtime providers: inference: - provider_id: vllm-inference @@ -52,50 +52,33 @@ providers: service_name: ${env.OTEL_SERVICE_NAME:llama-stack} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - - provider_id: memory-runtime - provider_type: inline::memory-runtime - config: {} metadata_store: + namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference + provider_model_id: null model_type: llm - metadata: {} model_id: ${env.SAFETY_MODEL} provider_id: vllm-safety + provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers + provider_model_id: null model_type: embedding shields: -- shield_id: ${env.SAFETY_MODEL} +- params: null + shield_id: ${env.SAFETY_MODEL} + provider_id: null + provider_shield_id: null memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::memory - provider_id: memory-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index a3a571423..c957b05d0 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -1,5 +1,6 @@ version: '2' image_name: remote-vllm +docker_image: null conda_env: remote-vllm apis: - agents @@ -7,7 +8,6 @@ apis: - memory - safety - telemetry -- tool_runtime providers: inference: - provider_id: vllm-inference @@ -46,39 +46,24 @@ providers: service_name: ${env.OTEL_SERVICE_NAME:llama-stack} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - - provider_id: memory-runtime - provider_type: inline::memory-runtime - config: {} metadata_store: + namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference + provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers + provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] -tool_groups: [] diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index ecaa2cf14..8693d70d3 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -134,7 +134,7 @@ def get_distribution_template() -> DistributionTemplate: "Inference model loaded into the vLLM server", ), "VLLM_URL": ( - "http://host.docker.internal:5100}/v1", + "http://host.docker.internal:5100/v1", "URL of the vLLM server with the main inference model", ), "MAX_TOKENS": (