all templates to include toolgroups and tool runtime

This commit is contained in:
Dinesh Yeduguru 2025-01-08 15:28:03 -08:00
parent 67b35613bb
commit edcfd66be3
55 changed files with 854 additions and 145 deletions

View file

@ -2,7 +2,6 @@ version: '2'
name: remote-vllm
distribution_spec:
description: Use (an external) vLLM server for running LLM inference
docker_image: null
providers:
inference:
- remote::vllm
@ -16,4 +15,9 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -1,6 +1,5 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: remote-vllm
apis:
- agents
@ -8,6 +7,7 @@ apis:
- memory
- safety
- telemetry
- tool_runtime
providers:
inference:
- provider_id: vllm-inference
@ -52,33 +52,50 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
- shield_id: ${env.SAFETY_MODEL}
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: remote-vllm
apis:
- agents
@ -8,6 +7,7 @@ apis:
- memory
- safety
- telemetry
- tool_runtime
providers:
inference:
- provider_id: vllm-inference
@ -46,24 +46,39 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []

View file

@ -7,8 +7,12 @@
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -24,6 +28,12 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "remote-vllm"
inference_provider = Provider(
@ -60,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -97,6 +121,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model,
],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={