llama-stack-mirror/llama_stack/templates/llama_api/run.yaml
ehhuang 5844c2da68
feat: add list responses API (#2233)
# What does this PR do?
This is not part of the official OpenAI API, but we'll use this for the
logs UI.
In order to support more filtering options, I'm adopting the newly
introduced sql store in in place of the kv store.

## Test Plan
Added integration/unit tests.
2025-05-23 13:16:48 -07:00

168 lines
4.9 KiB
YAML

version: '2'
image_name: llama_api
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: llama-openai-compat
provider_type: remote::llama-openai-compat
config:
openai_compat_api_base: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY:}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:}
- provider_id: ${env.ENABLE_PGVECTOR+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:localhost}
port: ${env.PGVECTOR_PORT:5432}
db: ${env.PGVECTOR_DB:}
user: ${env.PGVECTOR_USER:}
password: ${env.PGVECTOR_PASSWORD:}
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db
models:
- metadata: {}
model_id: Llama-3.3-70B-Instruct
provider_id: llama-openai-compat
provider_model_id: Llama-3.3-70B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: llama-openai-compat
provider_model_id: Llama-3.3-70B-Instruct
model_type: llm
- metadata: {}
model_id: Llama-4-Scout-17B-16E-Instruct-FP8
provider_id: llama-openai-compat
provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: llama-openai-compat
provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
provider_id: llama-openai-compat
provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: llama-openai-compat
provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321