mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-15 01:26:10 +00:00
agents to use tools api (#673)
# What does this PR do? PR #639 introduced the notion of Tools API and ability to invoke tools through API just as any resource. This PR changes the Agents to start using the Tools API to invoke tools. Major changes include: 1) Ability to specify tool groups with AgentConfig 2) Agent gets the corresponding tool definitions for the specified tools and pass along to the model 3) Attachements are now named as Documents and their behavior is mostly unchanged from user perspective 4) You can specify args that can be injected to a tool call through Agent config. This is especially useful in case of memory tool, where you want the tool to operate on a specific memory bank. 5) You can also register tool groups with args, which lets the agent inject these as well into the tool call. 6) All tests have been migrated to use new tools API and fixtures including client SDK tests 7) Telemetry just works with tools API because of our trace protocol decorator ## Test Plan ``` pytest -s -v -k fireworks llama_stack/providers/tests/agents/test_agents.py \ --safety-shield=meta-llama/Llama-Guard-3-8B \ --inference-model=meta-llama/Llama-3.1-8B-Instruct pytest -s -v -k together llama_stack/providers/tests/tools/test_tools.py \ --safety-shield=meta-llama/Llama-Guard-3-8B \ --inference-model=meta-llama/Llama-3.1-8B-Instruct LLAMA_STACK_CONFIG="/Users/dineshyv/.llama/distributions/llamastack-together/together-run.yaml" pytest -v tests/client-sdk/agents/test_agents.py ``` run.yaml: https://gist.github.com/dineshyv/0365845ad325e1c2cab755788ccc5994 Notebook: https://colab.research.google.com/drive/1ck7hXQxRl6UvT-ijNRZ-gMZxH1G3cN2d?usp=sharing
This commit is contained in:
parent
596afc6497
commit
a5c57cd381
116 changed files with 4959 additions and 2778 deletions
|
@ -9,8 +9,7 @@ from pathlib import Path
|
|||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
from llama_stack.apis.models import ModelInput
|
||||
from llama_stack.distribution.datatypes import Provider
|
||||
|
||||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
@ -26,6 +25,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
name = "bedrock"
|
||||
memory_provider = Provider(
|
||||
|
@ -46,6 +51,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
)
|
||||
for m in MODEL_ALIASES
|
||||
]
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -61,6 +80,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"memory": [memory_provider],
|
||||
},
|
||||
default_models=default_models,
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: bedrock
|
||||
distribution_spec:
|
||||
description: Use AWS Bedrock for running LLM inference and safety
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::bedrock
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: bedrock
|
||||
docker_image: null
|
||||
conda_env: bedrock
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: bedrock
|
||||
|
@ -65,8 +65,24 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
|
||||
models:
|
||||
|
@ -90,3 +106,10 @@ memory_banks: []
|
|||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: cerebras
|
||||
distribution_spec:
|
||||
description: Use Cerebras for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::cerebras
|
||||
|
@ -14,4 +13,9 @@ distribution_spec:
|
|||
- inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -9,8 +9,12 @@ from pathlib import Path
|
|||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -26,6 +30,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"memory": ["inline::meta-reference"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
|
||||
inference_provider = Provider(
|
||||
|
@ -58,6 +68,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name="cerebras",
|
||||
|
@ -74,6 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: cerebras
|
||||
docker_image: null
|
||||
conda_env: cerebras
|
||||
apis:
|
||||
- agents
|
||||
|
@ -8,6 +7,7 @@ apis:
|
|||
- memory
|
||||
- safety
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: cerebras
|
||||
|
@ -45,8 +45,24 @@ providers:
|
|||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
|
||||
models:
|
||||
|
@ -64,14 +80,17 @@ models:
|
|||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: fireworks
|
||||
distribution_spec:
|
||||
description: Use Fireworks.AI for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::fireworks
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -9,8 +9,12 @@ from pathlib import Path
|
|||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
|
||||
name = "fireworks"
|
||||
|
@ -69,6 +79,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -86,6 +110,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: fireworks
|
||||
docker_image: null
|
||||
conda_env: fireworks
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: fireworks
|
||||
|
@ -70,8 +70,24 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
||||
models:
|
||||
|
@ -129,14 +145,17 @@ models:
|
|||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: hf-endpoint
|
||||
distribution_spec:
|
||||
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::hf::endpoint
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -5,7 +5,12 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
name = "hf-endpoint"
|
||||
inference_provider = Provider(
|
||||
|
@ -58,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -74,6 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"memory": [memory_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
"run-with-safety.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
|
@ -96,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
embedding_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-endpoint
|
||||
docker_image: null
|
||||
conda_env: hf-endpoint
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-endpoint
|
||||
|
@ -75,33 +75,50 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: hf-endpoint
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: hf-endpoint-safety
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-endpoint
|
||||
docker_image: null
|
||||
conda_env: hf-endpoint
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-endpoint
|
||||
|
@ -70,24 +70,45 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: hf-endpoint
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: hf-serverless
|
||||
distribution_spec:
|
||||
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::hf::serverless
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -5,7 +5,12 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
|
||||
name = "hf-serverless"
|
||||
|
@ -59,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -97,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
embedding_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-serverless
|
||||
docker_image: null
|
||||
conda_env: hf-serverless
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-serverless
|
||||
|
@ -75,33 +75,50 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: hf-serverless
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: hf-serverless-safety
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: hf-serverless
|
||||
docker_image: null
|
||||
conda_env: hf-serverless
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-serverless
|
||||
|
@ -70,24 +70,39 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: hf-serverless
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups: []
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: meta-reference-gpu
|
||||
distribution_spec:
|
||||
description: Use Meta Reference for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- inline::meta-reference
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -7,8 +7,12 @@
|
|||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.meta_reference import (
|
||||
MetaReferenceInferenceConfig,
|
||||
)
|
||||
|
@ -29,6 +33,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
name = "meta-reference-gpu"
|
||||
inference_provider = Provider(
|
||||
|
@ -66,6 +76,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
model_id="${env.SAFETY_MODEL}",
|
||||
provider_id="meta-reference-safety",
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -104,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
embedding_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: meta-reference-gpu
|
||||
docker_image: null
|
||||
conda_env: meta-reference-gpu
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
|
@ -77,33 +77,50 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: meta-reference-safety
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: meta-reference-gpu
|
||||
docker_image: null
|
||||
conda_env: meta-reference-gpu
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
|
@ -71,24 +71,39 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups: []
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: meta-reference-quantized-gpu
|
||||
distribution_spec:
|
||||
description: Use Meta Reference with fp8, int4 quantization for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- inline::meta-reference-quantized
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -7,8 +7,7 @@
|
|||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
||||
from llama_stack.providers.inline.inference.meta_reference import (
|
||||
MetaReferenceQuantizedInferenceConfig,
|
||||
)
|
||||
|
@ -29,7 +28,27 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
name = "meta-reference-quantized-gpu"
|
||||
inference_provider = Provider(
|
||||
provider_id="meta-reference-inference",
|
||||
|
@ -76,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"memory": [memory_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: meta-reference-quantized-gpu
|
||||
docker_image: null
|
||||
conda_env: meta-reference-quantized-gpu
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
|
@ -73,24 +73,45 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: ollama
|
||||
distribution_spec:
|
||||
description: Use (an external) Ollama server for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::ollama
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -7,8 +7,12 @@
|
|||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
name = "ollama"
|
||||
inference_provider = Provider(
|
||||
|
@ -61,6 +71,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -92,6 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
embedding_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: ollama
|
||||
docker_image: null
|
||||
conda_env: ollama
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: ollama
|
||||
|
@ -69,33 +69,50 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: ollama
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: ollama
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: ollama
|
||||
docker_image: null
|
||||
conda_env: ollama
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: ollama
|
||||
|
@ -69,24 +69,39 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: ollama
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups: []
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: remote-vllm
|
||||
distribution_spec:
|
||||
description: Use (an external) vLLM server for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::vllm
|
||||
|
@ -16,4 +15,9 @@ distribution_spec:
|
|||
- inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: remote-vllm
|
||||
docker_image: null
|
||||
conda_env: remote-vllm
|
||||
apis:
|
||||
- agents
|
||||
|
@ -8,6 +7,7 @@ apis:
|
|||
- memory
|
||||
- safety
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm-inference
|
||||
|
@ -52,33 +52,50 @@ providers:
|
|||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm-inference
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: vllm-safety
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: remote-vllm
|
||||
docker_image: null
|
||||
conda_env: remote-vllm
|
||||
apis:
|
||||
- agents
|
||||
|
@ -8,6 +7,7 @@ apis:
|
|||
- memory
|
||||
- safety
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm-inference
|
||||
|
@ -46,24 +46,39 @@ providers:
|
|||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm-inference
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups: []
|
||||
|
|
|
@ -7,8 +7,12 @@
|
|||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -24,6 +28,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
name = "remote-vllm"
|
||||
inference_provider = Provider(
|
||||
|
@ -60,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -97,6 +121,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
embedding_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -20,6 +20,7 @@ from llama_stack.distribution.datatypes import (
|
|||
Provider,
|
||||
ShieldInput,
|
||||
StackRunConfig,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.distribution.distribution import get_provider_registry
|
||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||
|
@ -30,6 +31,7 @@ class RunConfigSettings(BaseModel):
|
|||
provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
|
||||
default_models: Optional[List[ModelInput]] = None
|
||||
default_shields: Optional[List[ShieldInput]] = None
|
||||
default_tool_groups: Optional[List[ToolGroupInput]] = None
|
||||
|
||||
def run_config(
|
||||
self,
|
||||
|
@ -91,6 +93,7 @@ class RunConfigSettings(BaseModel):
|
|||
),
|
||||
models=self.default_models or [],
|
||||
shields=self.default_shields or [],
|
||||
tool_groups=self.default_tool_groups or [],
|
||||
)
|
||||
|
||||
|
||||
|
@ -159,14 +162,22 @@ class DistributionTemplate(BaseModel):
|
|||
|
||||
build_config = self.build_config()
|
||||
with open(yaml_output_dir / "build.yaml", "w") as f:
|
||||
yaml.safe_dump(build_config.model_dump(), f, sort_keys=False)
|
||||
yaml.safe_dump(
|
||||
build_config.model_dump(exclude_none=True),
|
||||
f,
|
||||
sort_keys=False,
|
||||
)
|
||||
|
||||
for yaml_pth, settings in self.run_configs.items():
|
||||
run_config = settings.run_config(
|
||||
self.name, self.providers, self.docker_image
|
||||
)
|
||||
with open(yaml_output_dir / yaml_pth, "w") as f:
|
||||
yaml.safe_dump(run_config.model_dump(), f, sort_keys=False)
|
||||
yaml.safe_dump(
|
||||
run_config.model_dump(exclude_none=True),
|
||||
f,
|
||||
sort_keys=False,
|
||||
)
|
||||
|
||||
if self.template_path:
|
||||
docs = self.generate_markdown_docs()
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: tgi
|
||||
distribution_spec:
|
||||
description: Use (an external) TGI server for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::tgi
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: tgi
|
||||
docker_image: null
|
||||
conda_env: tgi
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: tgi-inference
|
||||
|
@ -70,27 +70,45 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: tgi-inference
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: tgi-safety
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: tgi
|
||||
docker_image: null
|
||||
conda_env: tgi
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: tgi-inference
|
||||
|
@ -69,24 +69,39 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: tgi-inference
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups: []
|
||||
|
|
|
@ -7,8 +7,12 @@
|
|||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
name = "tgi"
|
||||
inference_provider = Provider(
|
||||
|
@ -63,6 +73,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
model_id="${env.SAFETY_MODEL}",
|
||||
provider_id="tgi-safety",
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -99,6 +123,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
safety_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: together
|
||||
distribution_spec:
|
||||
description: Use Together.AI for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- remote::together
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: together
|
||||
docker_image: null
|
||||
conda_env: together
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: together
|
||||
|
@ -70,8 +70,24 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
||||
models:
|
||||
|
@ -124,14 +140,17 @@ models:
|
|||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -9,8 +9,12 @@ from pathlib import Path
|
|||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
name = "together"
|
||||
inference_provider = Provider(
|
||||
|
@ -59,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
)
|
||||
for m in MODEL_ALIASES
|
||||
]
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
|
@ -83,6 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"memory": [memory_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
),
|
||||
},
|
||||
|
|
|
@ -2,7 +2,6 @@ version: '2'
|
|||
name: vllm-gpu
|
||||
distribution_spec:
|
||||
description: Use a built-in vLLM engine for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference:
|
||||
- inline::vllm
|
||||
|
@ -25,4 +24,9 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
image_type: conda
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
version: '2'
|
||||
image_name: vllm-gpu
|
||||
docker_image: null
|
||||
conda_env: vllm-gpu
|
||||
apis:
|
||||
- agents
|
||||
|
@ -11,6 +10,7 @@ apis:
|
|||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm
|
||||
|
@ -73,24 +73,45 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm
|
||||
provider_model_id: null
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
provider_model_id: null
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::memory
|
||||
provider_id: memory-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
|
|
|
@ -11,7 +11,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
|||
)
|
||||
from llama_stack.providers.inline.inference.vllm import VLLMConfig
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
ToolGroupInput,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -24,7 +28,14 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
],
|
||||
}
|
||||
|
||||
name = "vllm-gpu"
|
||||
inference_provider = Provider(
|
||||
provider_id="vllm",
|
||||
|
@ -54,6 +65,20 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -70,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"memory": [memory_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue