llama-stack-mirror/redhat-distribution/run.yaml

version: 2
image_name: rh
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
  inference:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
      url: ${env.VLLM_URL:=http://localhost:8000/v1}
      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
      api_token: ${env.VLLM_API_TOKEN:=fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
  - provider_id: bedrock-inference
    provider_type: remote::bedrock
    config:
      aws_access_key_id: ${env.AWS_ACCESS_KEY_ID:=}
      aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
      aws_session_token: ${env.AWS_SESSION_TOKEN:=}
      region_name: ${env.AWS_DEFAULT_REGION:=}
      profile_name: ${env.AWS_PROFILE:=}
      total_max_attempts: ${env.AWS_MAX_ATTEMPTS:=}
      retry_mode: ${env.AWS_RETRY_MODE:=}
      connect_timeout: ${env.AWS_CONNECT_TIMEOUT:=}
      read_timeout: ${env.AWS_READ_TIMEOUT:=}
      session_ttl: ${env.AWS_SESSION_TTL:=}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
  vector_io:
  - provider_id: milvus
    provider_type: inline::milvus
    config:
      db_path: /opt/app-root/src/.llama/distributions/rh/milvus.db
      kvstore:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db
  safety:
    - provider_id: trustyai_fms
      provider_type: remote::trustyai_fms
      config:
        orchestrator_url: ${env.FMS_ORCHESTRATOR_URL:=}
        ssl_cert_path: ${env.FMS_SSL_CERT_PATH:=}
        shields: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/agents_store.db
      responses_store:
        type: sqlite
        db_path: /opt/app-root/src/.llama/distributions/rh/responses_store.db
  eval:
  - provider_id: trustyai_lmeval
    provider_type: remote::trustyai_lmeval
    config:
        use_k8s: True
        base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: /opt/app-root/src/.llama/distributions/rh/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
    config: {}
  - provider_id: llm-as-judge
    provider_type: inline::llm-as-judge
    config: {}
  - provider_id: braintrust
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:=}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
      sqlite_db_path: /opt/app-root/src/.llama/distributions/rh/trace_store.db
      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
metadata_store:
  type: sqlite
  db_path: /opt/app-root/src/.llama/distributions/rh/registry.db
inference_store:
  type: sqlite
  db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
models:
- metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  model_type: llm
- metadata:
    embedding_dimension: 768
  model_id: granite-embedding-125m
  provider_id: sentence-transformers
  provider_model_id: ibm-granite/granite-embedding-125m-english
  model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
  provider_id: tavily-search
- toolgroup_id: builtin::rag
  provider_id: rag-runtime
server:
  port: 8321
external_providers_dir: /opt/app-root/src/.llama/providers.d