mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 02:32:40 +00:00
Fix issue when generating vLLM distros
Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
parent
89e3f81520
commit
7c726826b8
3 changed files with 14 additions and 46 deletions
|
@ -1,5 +1,6 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: remote-vllm
|
image_name: remote-vllm
|
||||||
|
docker_image: null
|
||||||
conda_env: remote-vllm
|
conda_env: remote-vllm
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
@ -7,7 +8,6 @@ apis:
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: vllm-inference
|
- provider_id: vllm-inference
|
||||||
|
@ -52,50 +52,33 @@ providers:
|
||||||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
||||||
tool_runtime:
|
|
||||||
- provider_id: brave-search
|
|
||||||
provider_type: remote::brave-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: tavily-search
|
|
||||||
provider_type: remote::tavily-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: code-interpreter
|
|
||||||
provider_type: inline::code-interpreter
|
|
||||||
config: {}
|
|
||||||
- provider_id: memory-runtime
|
|
||||||
provider_type: inline::memory-runtime
|
|
||||||
config: {}
|
|
||||||
metadata_store:
|
metadata_store:
|
||||||
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
|
provider_model_id: null
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.SAFETY_MODEL}
|
model_id: ${env.SAFETY_MODEL}
|
||||||
provider_id: vllm-safety
|
provider_id: vllm-safety
|
||||||
|
provider_model_id: null
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 384
|
embedding_dimension: 384
|
||||||
model_id: all-MiniLM-L6-v2
|
model_id: all-MiniLM-L6-v2
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
|
provider_model_id: null
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL}
|
- params: null
|
||||||
|
shield_id: ${env.SAFETY_MODEL}
|
||||||
|
provider_id: null
|
||||||
|
provider_shield_id: null
|
||||||
memory_banks: []
|
memory_banks: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
tool_groups:
|
|
||||||
- toolgroup_id: builtin::websearch
|
|
||||||
provider_id: tavily-search
|
|
||||||
- toolgroup_id: builtin::memory
|
|
||||||
provider_id: memory-runtime
|
|
||||||
- toolgroup_id: builtin::code_interpreter
|
|
||||||
provider_id: code-interpreter
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: remote-vllm
|
image_name: remote-vllm
|
||||||
|
docker_image: null
|
||||||
conda_env: remote-vllm
|
conda_env: remote-vllm
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
@ -7,7 +8,6 @@ apis:
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: vllm-inference
|
- provider_id: vllm-inference
|
||||||
|
@ -46,39 +46,24 @@ providers:
|
||||||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
||||||
tool_runtime:
|
|
||||||
- provider_id: brave-search
|
|
||||||
provider_type: remote::brave-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: tavily-search
|
|
||||||
provider_type: remote::tavily-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: code-interpreter
|
|
||||||
provider_type: inline::code-interpreter
|
|
||||||
config: {}
|
|
||||||
- provider_id: memory-runtime
|
|
||||||
provider_type: inline::memory-runtime
|
|
||||||
config: {}
|
|
||||||
metadata_store:
|
metadata_store:
|
||||||
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
|
provider_model_id: null
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 384
|
embedding_dimension: 384
|
||||||
model_id: all-MiniLM-L6-v2
|
model_id: all-MiniLM-L6-v2
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
|
provider_model_id: null
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
memory_banks: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
tool_groups: []
|
|
||||||
|
|
|
@ -134,7 +134,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"Inference model loaded into the vLLM server",
|
"Inference model loaded into the vLLM server",
|
||||||
),
|
),
|
||||||
"VLLM_URL": (
|
"VLLM_URL": (
|
||||||
"http://host.docker.internal:5100}/v1",
|
"http://host.docker.internal:5100/v1",
|
||||||
"URL of the vLLM server with the main inference model",
|
"URL of the vLLM server with the main inference model",
|
||||||
),
|
),
|
||||||
"MAX_TOKENS": (
|
"MAX_TOKENS": (
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue