llama-stack-mirror/redhat-distribution/run.yaml
skamenan7 8f62870b01 Update Bedrock provider configuration
- Remove unnecessary provider file since Bedrock is in-tree
- Add Bedrock runtime configuration to run.yaml with optional AWS env vars
2025-08-29 12:12:24 -04:00

151 lines
4.4 KiB
YAML

version: 2
image_name: rh
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: bedrock-inference
provider_type: remote::bedrock
config:
aws_access_key_id: ${env.AWS_ACCESS_KEY_ID:=}
aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
aws_session_token: ${env.AWS_SESSION_TOKEN:=}
region_name: ${env.AWS_DEFAULT_REGION:=}
profile_name: ${env.AWS_PROFILE:=}
total_max_attempts: ${env.AWS_MAX_ATTEMPTS:=}
retry_mode: ${env.AWS_RETRY_MODE:=}
connect_timeout: ${env.AWS_CONNECT_TIMEOUT:=}
read_timeout: ${env.AWS_READ_TIMEOUT:=}
session_ttl: ${env.AWS_SESSION_TTL:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: milvus
provider_type: inline::milvus
config:
db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db
kvstore:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db
safety:
- provider_id: trustyai_fms
provider_type: remote::trustyai_fms
config:
orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=}
ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=}
shields: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db
responses_store:
type: sqlite
db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db
eval:
- provider_id: trustyai_lmeval
provider_type: remote::trustyai_lmeval
config:
use_k8s: True
base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: /opt/app-root/src/.llama/distributions/rh/registry.db
inference_store:
type: sqlite
db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: granite-embedding-125m
provider_id: sentence-transformers
provider_model_id: ibm-granite/granite-embedding-125m-english
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
external_providers_dir: /opt/app-root/src/.llama/providers.d