Fix issue when generating vLLM distros

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
Yuan Tang 2025-01-13 18:43:23 -05:00
parent 89e3f81520
commit 7c726826b8
No known key found for this signature in database
3 changed files with 14 additions and 46 deletions

View file

@ -1,5 +1,6 @@
version: '2' version: '2'
image_name: remote-vllm image_name: remote-vllm
docker_image: null
conda_env: remote-vllm conda_env: remote-vllm
apis: apis:
- agents - agents
@ -7,7 +8,6 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -52,50 +52,33 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference provider_id: vllm-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety provider_id: vllm-safety
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- shield_id: ${env.SAFETY_MODEL} - params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,5 +1,6 @@
version: '2' version: '2'
image_name: remote-vllm image_name: remote-vllm
docker_image: null
conda_env: remote-vllm conda_env: remote-vllm
apis: apis:
- agents - agents
@ -7,7 +8,6 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -46,39 +46,24 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference provider_id: vllm-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: []

View file

@ -134,7 +134,7 @@ def get_distribution_template() -> DistributionTemplate:
"Inference model loaded into the vLLM server", "Inference model loaded into the vLLM server",
), ),
"VLLM_URL": ( "VLLM_URL": (
"http://host.docker.internal:5100}/v1", "http://host.docker.internal:5100/v1",
"URL of the vLLM server with the main inference model", "URL of the vLLM server with the main inference model",
), ),
"MAX_TOKENS": ( "MAX_TOKENS": (