Merge 059d880bc0 into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-10-20 14:53:48 -07:00 committed by GitHub
commit 6cb148dbe6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
199 changed files with 27713 additions and 7978 deletions

View file

@ -82,11 +82,14 @@ runs:
echo "No recording changes" echo "No recording changes"
fi fi
- name: Write inference logs to file - name: Write docker logs to file
if: ${{ always() }} if: ${{ always() }}
shell: bash shell: bash
run: | run: |
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
stack_container_name="llama-stack-test-$distro_name"
sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
- name: Upload logs - name: Upload logs
if: ${{ always() }} if: ${{ always() }}

View file

@ -73,6 +73,24 @@ jobs:
image_name: kube image_name: kube
apis: [] apis: []
providers: {} providers: {}
storage:
backends:
kv_default:
type: kv_sqlite
db_path: $run_dir/kvstore.db
sql_default:
type: sql_sqlite
db_path: $run_dir/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
conversations:
table_name: openai_conversations
backend: sql_default
server: server:
port: 8321 port: 8321
EOF EOF

View file

@ -169,9 +169,7 @@ jobs:
run: | run: |
uv run --no-sync \ uv run --no-sync \
pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \ pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
tests/integration/vector_io \ tests/integration/vector_io
--embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
--embedding-dimension 768
- name: Check Storage and Memory Available After Tests - name: Check Storage and Memory Available After Tests
if: ${{ always() }} if: ${{ always() }}

View file

@ -98,21 +98,30 @@ data:
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
metadata_store: storage:
type: postgres backends:
host: ${env.POSTGRES_HOST:=localhost} kv_default:
port: ${env.POSTGRES_PORT:=5432} type: kv_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
table_name: llamastack_kvstore user: ${env.POSTGRES_USER:=llamastack}
inference_store: password: ${env.POSTGRES_PASSWORD:=llamastack}
type: postgres table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
host: ${env.POSTGRES_HOST:=localhost} sql_default:
port: ${env.POSTGRES_PORT:=5432} type: sql_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768
@ -137,5 +146,4 @@ data:
port: 8323 port: 8323
kind: ConfigMap kind: ConfigMap
metadata: metadata:
creationTimestamp: null
name: llama-stack-config name: llama-stack-config

View file

@ -95,21 +95,30 @@ providers:
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
metadata_store: storage:
type: postgres backends:
host: ${env.POSTGRES_HOST:=localhost} kv_default:
port: ${env.POSTGRES_PORT:=5432} type: kv_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
table_name: llamastack_kvstore user: ${env.POSTGRES_USER:=llamastack}
inference_store: password: ${env.POSTGRES_PASSWORD:=llamastack}
type: postgres table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
host: ${env.POSTGRES_HOST:=localhost} sql_default:
port: ${env.POSTGRES_PORT:=5432} type: sql_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768

View file

@ -0,0 +1,8 @@
These are the source-of-truth configuration files used to generate the Stainless client SDKs via Stainless.
- `openapi.yml`: this is the OpenAPI specification for the Llama Stack API.
- `openapi.stainless.yml`: this is the Stainless _configuration_ which instructs Stainless how to generate the client SDKs.
A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files.
These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `run_openapi_generator.sh` script.

View file

@ -0,0 +1,608 @@
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
organization:
# Name of your organization or company, used to determine the name of the client
# and headings.
name: llama-stack-client
docs: https://llama-stack.readthedocs.io/en/latest/
contact: llamastack@meta.com
security:
- {}
- BearerAuth: []
security_schemes:
BearerAuth:
type: http
scheme: bearer
# `targets` define the output targets and their customization options, such as
# whether to emit the Node SDK and what it's package name should be.
targets:
node:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-typescript
publish:
npm: false
python:
package_name: llama_stack_client
production_repo: llamastack/llama-stack-client-python
options:
use_uv: true
publish:
pypi: true
project_name: llama_stack_client
kotlin:
reverse_domain: com.llama_stack_client.api
production_repo: null
publish:
maven: false
go:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-go
options:
enable_v2: true
back_compat_use_shared_package: false
# `client_settings` define settings for the API client, such as extra constructor
# arguments (used for authentication), retry behavior, idempotency, etc.
client_settings:
default_env_prefix: LLAMA_STACK_CLIENT
opts:
api_key:
type: string
read_env: LLAMA_STACK_CLIENT_API_KEY
auth: { security_scheme: BearerAuth }
nullable: true
# `environments` are a map of the name of the environment (e.g. "sandbox",
# "production") to the corresponding url to use.
environments:
production: http://any-hosted-llama-stack.com
# `pagination` defines [pagination schemes] which provides a template to match
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
pagination:
- name: datasets_iterrows
type: offset
request:
dataset_id:
type: string
start_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_param
limit:
type: integer
response:
data:
type: array
items:
type: object
next_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_start_field
- name: openai_cursor_page
type: cursor
request:
limit:
type: integer
after:
type: string
x-stainless-pagination-property:
purpose: next_cursor_param
response:
data:
type: array
items: {}
has_more:
type: boolean
last_id:
type: string
x-stainless-pagination-property:
purpose: next_cursor_field
# `resources` define the structure and organziation for your API, such as how
# methods and models are grouped together and accessed. See the [configuration
# guide] for more information.
#
# [configuration guide]:
# https://app.stainlessapi.com/docs/guides/configure#resources
resources:
$shared:
models:
agent_config: AgentConfig
interleaved_content_item: InterleavedContentItem
interleaved_content: InterleavedContent
param_type: ParamType
safety_violation: SafetyViolation
sampling_params: SamplingParams
scoring_result: ScoringResult
message: Message
user_message: UserMessage
completion_message: CompletionMessage
tool_response_message: ToolResponseMessage
system_message: SystemMessage
tool_call: ToolCall
query_result: RAGQueryResult
document: RAGDocument
query_config: RAGQueryConfig
response_format: ResponseFormat
toolgroups:
models:
tool_group: ToolGroup
list_tool_groups_response: ListToolGroupsResponse
methods:
register: post /v1/toolgroups
get: get /v1/toolgroups/{toolgroup_id}
list: get /v1/toolgroups
unregister: delete /v1/toolgroups/{toolgroup_id}
tools:
methods:
get: get /v1/tools/{tool_name}
list:
endpoint: get /v1/tools
paginated: false
tool_runtime:
models:
tool_def: ToolDef
tool_invocation_result: ToolInvocationResult
methods:
list_tools:
endpoint: get /v1/tool-runtime/list-tools
paginated: false
invoke_tool: post /v1/tool-runtime/invoke
subresources:
rag_tool:
methods:
insert: post /v1/tool-runtime/rag-tool/insert
query: post /v1/tool-runtime/rag-tool/query
responses:
models:
response_object_stream: OpenAIResponseObjectStream
response_object: OpenAIResponseObject
methods:
create:
type: http
endpoint: post /v1/responses
streaming:
stream_event_model: responses.response_object_stream
param_discriminator: stream
retrieve: get /v1/responses/{response_id}
list:
type: http
endpoint: get /v1/responses
delete:
type: http
endpoint: delete /v1/responses/{response_id}
subresources:
input_items:
methods:
list:
type: http
endpoint: get /v1/responses/{response_id}/input_items
conversations:
models:
conversation_object: Conversation
methods:
create:
type: http
endpoint: post /v1/conversations
retrieve: get /v1/conversations/{conversation_id}
update:
type: http
endpoint: post /v1/conversations/{conversation_id}
delete:
type: http
endpoint: delete /v1/conversations/{conversation_id}
subresources:
items:
methods:
get:
type: http
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
list:
type: http
endpoint: get /v1/conversations/{conversation_id}/items
create:
type: http
endpoint: post /v1/conversations/{conversation_id}/items
datasets:
models:
list_datasets_response: ListDatasetsResponse
methods:
register: post /v1beta/datasets
retrieve: get /v1beta/datasets/{dataset_id}
list:
endpoint: get /v1beta/datasets
paginated: false
unregister: delete /v1beta/datasets/{dataset_id}
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
inspect:
models:
healthInfo: HealthInfo
providerInfo: ProviderInfo
routeInfo: RouteInfo
versionInfo: VersionInfo
methods:
health: get /v1/health
version: get /v1/version
embeddings:
models:
create_embeddings_response: OpenAIEmbeddingsResponse
methods:
create: post /v1/embeddings
chat:
models:
chat_completion_chunk: OpenAIChatCompletionChunk
subresources:
completions:
methods:
create:
type: http
endpoint: post /v1/chat/completions
streaming:
stream_event_model: chat.chat_completion_chunk
param_discriminator: stream
list:
type: http
endpoint: get /v1/chat/completions
retrieve:
type: http
endpoint: get /v1/chat/completions/{completion_id}
completions:
methods:
create:
type: http
endpoint: post /v1/completions
streaming:
param_discriminator: stream
vector_io:
models:
queryChunksResponse: QueryChunksResponse
methods:
insert: post /v1/vector-io/insert
query: post /v1/vector-io/query
vector_stores:
models:
vector_store: VectorStoreObject
list_vector_stores_response: VectorStoreListResponse
vector_store_delete_response: VectorStoreDeleteResponse
vector_store_search_response: VectorStoreSearchResponsePage
methods:
create: post /v1/vector_stores
list:
endpoint: get /v1/vector_stores
retrieve: get /v1/vector_stores/{vector_store_id}
update: post /v1/vector_stores/{vector_store_id}
delete: delete /v1/vector_stores/{vector_store_id}
search: post /v1/vector_stores/{vector_store_id}/search
subresources:
files:
models:
vector_store_file: VectorStoreFileObject
methods:
list: get /v1/vector_stores/{vector_store_id}/files
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
create: post /v1/vector_stores/{vector_store_id}/files
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
file_batches:
models:
vector_store_file_batches: VectorStoreFileBatchObject
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
methods:
create: post /v1/vector_stores/{vector_store_id}/file_batches
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
models:
models:
model: Model
list_models_response: ListModelsResponse
methods:
retrieve: get /v1/models/{model_id}
list:
endpoint: get /v1/models
paginated: false
register: post /v1/models
unregister: delete /v1/models/{model_id}
subresources:
openai:
methods:
list:
endpoint: get /v1/models
paginated: false
providers:
models:
list_providers_response: ListProvidersResponse
methods:
list:
endpoint: get /v1/providers
paginated: false
retrieve: get /v1/providers/{provider_id}
routes:
models:
list_routes_response: ListRoutesResponse
methods:
list:
endpoint: get /v1/inspect/routes
paginated: false
moderations:
models:
create_response: ModerationObject
methods:
create: post /v1/moderations
safety:
models:
run_shield_response: RunShieldResponse
methods:
run_shield: post /v1/safety/run-shield
shields:
models:
shield: Shield
list_shields_response: ListShieldsResponse
methods:
retrieve: get /v1/shields/{identifier}
list:
endpoint: get /v1/shields
paginated: false
register: post /v1/shields
delete: delete /v1/shields/{identifier}
synthetic_data_generation:
models:
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
methods:
generate: post /v1/synthetic-data-generation/generate
telemetry:
models:
span_with_status: SpanWithStatus
trace: Trace
query_spans_response: QuerySpansResponse
event: Event
query_condition: QueryCondition
methods:
query_traces:
endpoint: post /v1alpha/telemetry/traces
skip_test_reason: 'unsupported query params in java / kotlin'
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
query_spans:
endpoint: post /v1alpha/telemetry/spans
skip_test_reason: 'unsupported query params in java / kotlin'
query_metrics:
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
skip_test_reason: 'unsupported query params in java / kotlin'
# log_event: post /v1alpha/telemetry/events
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
get_trace: get /v1alpha/telemetry/traces/{trace_id}
scoring:
methods:
score: post /v1/scoring/score
score_batch: post /v1/scoring/score-batch
scoring_functions:
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
endpoint: get /v1/scoring-functions
paginated: false
register: post /v1/scoring-functions
models:
scoring_fn: ScoringFn
scoring_fn_params: ScoringFnParams
list_scoring_functions_response: ListScoringFunctionsResponse
benchmarks:
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
endpoint: get /v1alpha/eval/benchmarks
paginated: false
register: post /v1alpha/eval/benchmarks
models:
benchmark: Benchmark
list_benchmarks_response: ListBenchmarksResponse
files:
methods:
create: post /v1/files
list: get /v1/files
retrieve: get /v1/files/{file_id}
delete: delete /v1/files/{file_id}
content: get /v1/files/{file_id}/content
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
alpha:
subresources:
inference:
methods:
rerank: post /v1alpha/inference/rerank
post_training:
models:
algorithm_config: AlgorithmConfig
post_training_job: PostTrainingJob
list_post_training_jobs_response: ListPostTrainingJobsResponse
methods:
preference_optimize: post /v1alpha/post-training/preference-optimize
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
subresources:
job:
methods:
artifacts: get /v1alpha/post-training/job/artifacts
cancel: post /v1alpha/post-training/job/cancel
status: get /v1alpha/post-training/job/status
list:
endpoint: get /v1alpha/post-training/jobs
paginated: false
eval:
methods:
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
subresources:
jobs:
methods:
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
agents:
methods:
create: post /v1alpha/agents
list: get /v1alpha/agents
retrieve: get /v1alpha/agents/{agent_id}
delete: delete /v1alpha/agents/{agent_id}
models:
inference_step: InferenceStep
tool_execution_step: ToolExecutionStep
tool_response: ToolResponse
shield_call_step: ShieldCallStep
memory_retrieval_step: MemoryRetrievalStep
subresources:
session:
models:
session: Session
methods:
list: get /v1alpha/agents/{agent_id}/sessions
create: post /v1alpha/agents/{agent_id}/session
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
steps:
methods:
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
turn:
models:
turn: Turn
turn_response_event: AgentTurnResponseEvent
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
methods:
create:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
resume:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
settings:
license: MIT
unwrap_response_fields: [ data ]
openapi:
transformations:
- command: renameValue
reason: pydantic reserved name
args:
filter:
only:
- '$.components.schemas.InferenceStep.properties.model_response'
rename:
python:
property_name: 'inference_model_response'
# - command: renameValue
# reason: pydantic reserved name
# args:
# filter:
# only:
# - '$.components.schemas.Model.properties.model_type'
# rename:
# python:
# property_name: 'type'
- command: mergeObject
reason: Better return_type using enum
args:
target:
- '$.components.schemas'
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- '$.components.schemas.ScoringFn.properties.return_type'
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
value:
$ref: '#/components/schemas/ReturnType'
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
- reason: For better names
command: extractToRefs
args:
ref:
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
name: '#/components/schemas/ToolCallOrString'
# `readme` is used to configure the code snippets that will be rendered in the
# README.md of various SDKs. In particular, you can change the `headline`
# snippet's endpoint and the arguments to call it with.
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: &ref_0 {}
headline:
type: request
endpoint: post /v1/models
params: *ref_0
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}

File diff suppressed because it is too large Load diff

View file

@ -60,6 +60,17 @@ ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH}
# Copy the repository so editable installs and run configurations are available. # Copy the repository so editable installs and run configurations are available.
COPY . /workspace COPY . /workspace
# Install the client package if it is provided
# NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python
RUN set -eux; \
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
exit 1; \
fi; \
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
fi;
# Install llama-stack # Install llama-stack
RUN set -eux; \ RUN set -eux; \
if [ "$INSTALL_MODE" = "editable" ]; then \ if [ "$INSTALL_MODE" = "editable" ]; then \
@ -83,16 +94,6 @@ RUN set -eux; \
fi; \ fi; \
fi; fi;
# Install the client package if it is provided
RUN set -eux; \
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
exit 1; \
fi; \
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
fi;
# Install the dependencies for the distribution # Install the dependencies for the distribution
RUN set -eux; \ RUN set -eux; \
if [ -z "$DISTRO_NAME" ]; then \ if [ -z "$DISTRO_NAME" ]; then \

View file

@ -88,18 +88,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through:
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so: To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:
```yaml ```yaml
models: vector_stores:
- model_id: nomic-ai/nomic-embed-text-v1.5 default_provider_id: faiss
provider_id: inline::sentence-transformers default_embedding_model:
metadata: provider_id: sentence-transformers
embedding_dimension: 768 model_id: nomic-ai/nomic-embed-text-v1.5
default_configured: true
``` ```
With this configuration: With this configuration:
- `client.vector_stores.create()` works without requiring embedding model parameters - `client.vector_stores.create()` works without requiring embedding model or provider parameters
- The system automatically uses the default model and its embedding dimension for any newly created vector store - The system automatically uses the default vector store provider (`faiss`) when multiple providers are available
- Only one model can be marked as `default_configured: true` - The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store
- The `default_provider_id` specifies which vector storage backend to use
- The `default_embedding_model` specifies both the inference provider and model for embeddings
## Vector Store Operations ## Vector Store Operations
@ -108,14 +109,15 @@ With this configuration:
You can create vector stores with automatic or explicit embedding model selection: You can create vector stores with automatic or explicit embedding model selection:
```python ```python
# Automatic - uses default configured embedding model # Automatic - uses default configured embedding model and vector store provider
vs = client.vector_stores.create() vs = client.vector_stores.create()
# Explicit - specify embedding model when you need a specific one # Explicit - specify embedding model and/or provider when you need specific ones
vs = client.vector_stores.create( vs = client.vector_stores.create(
extra_body={ extra_body={
"embedding_model": "nomic-ai/nomic-embed-text-v1.5", "provider_id": "faiss", # Optional: specify vector store provider
"embedding_dimension": 768 "embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5",
"embedding_dimension": 768 # Optional: will be auto-detected if not provided
} }
) )
``` ```

View file

@ -44,18 +44,32 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
namespace: null backend: kv_default
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db namespace: agents
responses:
backend: sql_default
table_name: responses
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: {} config: {}
metadata_store: storage:
namespace: null backends:
type: sqlite kv_default:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -1,56 +1,155 @@
apiVersion: v1 apiVersion: v1
data: data:
stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n- stack_run_config.yaml: |
inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n version: '2'
\ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n image_name: kubernetes-demo
\ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens: apis:
${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify: - agents
${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type: - inference
remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n - files
\ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n - safety
\ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n - telemetry
\ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n - tool_runtime
\ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n - vector_io
\ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n providers:
\ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n inference:
\ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n - provider_id: vllm-inference
\ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id: provider_type: remote::vllm
meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir: config:
${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}
\ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
\ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n api_token: ${env.VLLM_API_TOKEN:=fake}
\ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n tls_verify: ${env.VLLM_TLS_VERIFY:=true}
\ provider_type: inline::meta-reference\n config:\n persistence_store:\n - provider_id: vllm-safety
\ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port: provider_type: remote::vllm
${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: config:
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
\ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n api_token: ${env.VLLM_API_TOKEN:=fake}
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n tls_verify: ${env.VLLM_TLS_VERIFY:=true}
\ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n - provider_id: sentence-transformers
\ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks: provider_type: inline::sentence-transformers
${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n config: {}
\ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n vector_io:
\ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
\ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results: provider_type: remote::chromadb
3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config: config:
{}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n url: ${env.CHROMADB_URL:=}
\ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n kvstore:
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: type: postgres
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n host: ${env.POSTGRES_HOST:=localhost}
\ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host: port: ${env.POSTGRES_PORT:=5432}
${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n db: ${env.POSTGRES_DB:=llamastack}
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n- user: ${env.POSTGRES_USER:=llamastack}
metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id: password: ${env.POSTGRES_PASSWORD:=llamastack}
sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n files:
\ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id: - provider_id: meta-reference-files
${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n provider_type: inline::localfs
\ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs: config:
[]\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n metadata_store:
\ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n type: sqlite
\ type: github_token\n" db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
storage:
backends:
kv_default:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_default:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models:
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
provider_id: vllm-safety
model_type: llm
shields:
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
auth:
provider_config:
type: github_token
kind: ConfigMap kind: ConfigMap
metadata: metadata:
creationTimestamp: null
name: llama-stack-config name: llama-stack-config

View file

@ -93,21 +93,30 @@ providers:
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
metadata_store: storage:
type: postgres backends:
host: ${env.POSTGRES_HOST:=localhost} kv_default:
port: ${env.POSTGRES_PORT:=5432} type: kv_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
table_name: llamastack_kvstore user: ${env.POSTGRES_USER:=llamastack}
inference_store: password: ${env.POSTGRES_PASSWORD:=llamastack}
type: postgres table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
host: ${env.POSTGRES_HOST:=localhost} sql_default:
port: ${env.POSTGRES_PORT:=5432} type: sql_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768

View file

@ -14,16 +14,18 @@ Meta's reference implementation of an agent system that can use tools, access ve
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No | | |
| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
``` ```

View file

@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Configuration for the key-value store backend. | | `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Configuration for the key-value store backend. |
| `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. | | `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
| `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. | | `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
@ -22,6 +22,6 @@ Reference implementation of batches API with KVStore persistence.
```yaml ```yaml
kvstore: kvstore:
type: sqlite namespace: batches
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db backend: kv_default
``` ```

View file

@ -14,12 +14,12 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db backend: kv_default
``` ```

View file

@ -14,12 +14,12 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db backend: kv_default
``` ```

View file

@ -14,12 +14,12 @@ Meta's reference implementation of evaluation tasks with support for multiple la
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db backend: kv_default
``` ```

View file

@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `storage_dir` | `<class 'str'>` | No | | Directory to store uploaded files | | `storage_dir` | `<class 'str'>` | No | | Directory to store uploaded files |
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata | | `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
| `ttl_secs` | `<class 'int'>` | No | 31536000 | | | `ttl_secs` | `<class 'int'>` | No | 31536000 | |
## Sample Configuration ## Sample Configuration
@ -23,6 +23,6 @@ Local filesystem-based file storage provider for managing files and documents lo
```yaml ```yaml
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db backend: sql_default
``` ```

View file

@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad
| `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) | | `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) |
| `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) | | `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
| `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist | | `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata | | `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
## Sample Configuration ## Sample Configuration
@ -32,6 +32,6 @@ aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
endpoint_url: ${env.S3_ENDPOINT_URL:=} endpoint_url: ${env.S3_ENDPOINT_URL:=}
auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false} auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false}
metadata_store: metadata_store:
type: sqlite table_name: s3_files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db backend: sql_default
``` ```

View file

@ -79,13 +79,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | | | `db_path` | `<class 'str'>` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
db_path: ${env.CHROMADB_PATH} db_path: ${env.CHROMADB_PATH}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db backend: kv_default
``` ```

View file

@ -95,12 +95,12 @@ more details about Faiss in general.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db backend: kv_default
``` ```

View file

@ -14,14 +14,14 @@ Meta's reference implementation of a vector database.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db backend: kv_default
``` ```
## Deprecation Notice ## Deprecation Notice

View file

@ -17,14 +17,14 @@ Please refer to the remote provider documentation.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | | | `db_path` | `<class 'str'>` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server | | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
kvstore: persistence:
type: sqlite namespace: vector_io::milvus
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db backend: kv_default
``` ```

View file

@ -98,13 +98,13 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `path` | `<class 'str'>` | No | | | | `path` | `<class 'str'>` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
kvstore: persistence:
type: sqlite namespace: vector_io::qdrant
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db backend: kv_default
``` ```

View file

@ -408,13 +408,13 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file | | `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
kvstore: persistence:
type: sqlite namespace: vector_io::sqlite_vec
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db backend: kv_default
``` ```

View file

@ -17,15 +17,15 @@ Please refer to the sqlite-vec provider documentation.
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file | | `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
kvstore: persistence:
type: sqlite namespace: vector_io::sqlite_vec
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db backend: kv_default
``` ```
## Deprecation Notice ## Deprecation Notice

View file

@ -78,13 +78,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description | | Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | | | | `url` | `str \| None` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.CHROMADB_URL} url: ${env.CHROMADB_URL}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db backend: kv_default
``` ```

View file

@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
| `uri` | `<class 'str'>` | No | | The URI of the Milvus server | | `uri` | `<class 'str'>` | No | | The URI of the Milvus server |
| `token` | `str \| None` | No | | The token of the Milvus server | | `token` | `str \| None` | No | | The token of the Milvus server |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server | | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend |
| `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
:::note :::note
@ -420,7 +420,7 @@ This configuration class accepts additional fields beyond those listed above. Yo
```yaml ```yaml
uri: ${env.MILVUS_ENDPOINT} uri: ${env.MILVUS_ENDPOINT}
token: ${env.MILVUS_TOKEN} token: ${env.MILVUS_TOKEN}
kvstore: persistence:
type: sqlite namespace: vector_io::milvus_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db backend: kv_default
``` ```

View file

@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
| `db` | `str \| None` | No | postgres | | | `db` | `str \| None` | No | postgres | |
| `user` | `str \| None` | No | postgres | | | `user` | `str \| None` | No | postgres | |
| `password` | `str \| None` | No | mysecretpassword | | | `password` | `str \| None` | No | mysecretpassword | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | | `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration
@ -228,7 +228,7 @@ port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB} db: ${env.PGVECTOR_DB}
user: ${env.PGVECTOR_USER} user: ${env.PGVECTOR_USER}
password: ${env.PGVECTOR_PASSWORD} password: ${env.PGVECTOR_PASSWORD}
kvstore: persistence:
type: sqlite namespace: vector_io::pgvector
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db backend: kv_default
``` ```

View file

@ -26,13 +26,13 @@ Please refer to the inline provider documentation.
| `prefix` | `str \| None` | No | | | | `prefix` | `str \| None` | No | | |
| `timeout` | `int \| None` | No | | | | `timeout` | `int \| None` | No | | |
| `host` | `str \| None` | No | | | | `host` | `str \| None` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | | `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
api_key: ${env.QDRANT_API_KEY:=} api_key: ${env.QDRANT_API_KEY:=}
kvstore: persistence:
type: sqlite namespace: vector_io::qdrant_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db backend: kv_default
``` ```

View file

@ -75,14 +75,14 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance | | `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance |
| `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster | | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | | `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
weaviate_api_key: null weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
kvstore: persistence:
type: sqlite namespace: vector_io::weaviate
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db backend: kv_default
``` ```

View file

@ -30,3 +30,5 @@ fi
stack_dir=$(dirname $(dirname $THIS_DIR)) stack_dir=$(dirname $(dirname $THIS_DIR))
PYTHONPATH=$PYTHONPATH:$stack_dir \ PYTHONPATH=$PYTHONPATH:$stack_dir \
python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/static python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/static
cp $stack_dir/docs/static/stainless-llama-stack-spec.yaml $stack_dir/client-sdks/stainless/openapi.yml

View file

@ -9024,6 +9024,10 @@
"$ref": "#/components/schemas/OpenAIResponseUsage", "$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response" "description": "(Optional) Token usage information for the response"
}, },
"instructions": {
"type": "string",
"description": "(Optional) System message inserted into the model's context"
},
"input": { "input": {
"type": "array", "type": "array",
"items": { "items": {
@ -9901,6 +9905,10 @@
"usage": { "usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage", "$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response" "description": "(Optional) Token usage information for the response"
},
"instructions": {
"type": "string",
"description": "(Optional) System message inserted into the model's context"
} }
}, },
"additionalProperties": false, "additionalProperties": false,

View file

@ -6734,6 +6734,10 @@ components:
$ref: '#/components/schemas/OpenAIResponseUsage' $ref: '#/components/schemas/OpenAIResponseUsage'
description: >- description: >-
(Optional) Token usage information for the response (Optional) Token usage information for the response
instructions:
type: string
description: >-
(Optional) System message inserted into the model's context
input: input:
type: array type: array
items: items:
@ -7403,6 +7407,10 @@ components:
$ref: '#/components/schemas/OpenAIResponseUsage' $ref: '#/components/schemas/OpenAIResponseUsage'
description: >- description: >-
(Optional) Token usage information for the response (Optional) Token usage information for the response
instructions:
type: string
description: >-
(Optional) System message inserted into the model's context
additionalProperties: false additionalProperties: false
required: required:
- created_at - created_at

View file

@ -7600,6 +7600,10 @@
"$ref": "#/components/schemas/OpenAIResponseUsage", "$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response" "description": "(Optional) Token usage information for the response"
}, },
"instructions": {
"type": "string",
"description": "(Optional) System message inserted into the model's context"
},
"input": { "input": {
"type": "array", "type": "array",
"items": { "items": {
@ -8148,6 +8152,10 @@
"usage": { "usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage", "$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response" "description": "(Optional) Token usage information for the response"
},
"instructions": {
"type": "string",
"description": "(Optional) System message inserted into the model's context"
} }
}, },
"additionalProperties": false, "additionalProperties": false,

View file

@ -5815,6 +5815,10 @@ components:
$ref: '#/components/schemas/OpenAIResponseUsage' $ref: '#/components/schemas/OpenAIResponseUsage'
description: >- description: >-
(Optional) Token usage information for the response (Optional) Token usage information for the response
instructions:
type: string
description: >-
(Optional) System message inserted into the model's context
input: input:
type: array type: array
items: items:
@ -6218,6 +6222,10 @@ components:
$ref: '#/components/schemas/OpenAIResponseUsage' $ref: '#/components/schemas/OpenAIResponseUsage'
description: >- description: >-
(Optional) Token usage information for the response (Optional) Token usage information for the response
instructions:
type: string
description: >-
(Optional) System message inserted into the model's context
additionalProperties: false additionalProperties: false
required: required:
- created_at - created_at

View file

@ -9272,6 +9272,10 @@
"$ref": "#/components/schemas/OpenAIResponseUsage", "$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response" "description": "(Optional) Token usage information for the response"
}, },
"instructions": {
"type": "string",
"description": "(Optional) System message inserted into the model's context"
},
"input": { "input": {
"type": "array", "type": "array",
"items": { "items": {
@ -9820,6 +9824,10 @@
"usage": { "usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage", "$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response" "description": "(Optional) Token usage information for the response"
},
"instructions": {
"type": "string",
"description": "(Optional) System message inserted into the model's context"
} }
}, },
"additionalProperties": false, "additionalProperties": false,

View file

@ -7028,6 +7028,10 @@ components:
$ref: '#/components/schemas/OpenAIResponseUsage' $ref: '#/components/schemas/OpenAIResponseUsage'
description: >- description: >-
(Optional) Token usage information for the response (Optional) Token usage information for the response
instructions:
type: string
description: >-
(Optional) System message inserted into the model's context
input: input:
type: array type: array
items: items:
@ -7431,6 +7435,10 @@ components:
$ref: '#/components/schemas/OpenAIResponseUsage' $ref: '#/components/schemas/OpenAIResponseUsage'
description: >- description: >-
(Optional) Token usage information for the response (Optional) Token usage information for the response
instructions:
type: string
description: >-
(Optional) System message inserted into the model's context
additionalProperties: false additionalProperties: false
required: required:
- created_at - created_at

View file

@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
:param tools: (Optional) An array of tools the model may call while generating a response. :param tools: (Optional) An array of tools the model may call while generating a response.
:param truncation: (Optional) Truncation strategy applied to the response :param truncation: (Optional) Truncation strategy applied to the response
:param usage: (Optional) Token usage information for the response :param usage: (Optional) Token usage information for the response
:param instructions: (Optional) System message inserted into the model's context
""" """
created_at: int created_at: int
@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
tools: list[OpenAIResponseTool] | None = None tools: list[OpenAIResponseTool] | None = None
truncation: str | None = None truncation: str | None = None
usage: OpenAIResponseUsage | None = None usage: OpenAIResponseUsage | None = None
instructions: str | None = None
@json_schema_type @json_schema_type

View file

@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
models = "models" models = "models"
shields = "shields" shields = "shields"
vector_dbs = "vector_dbs" # only used for routing
datasets = "datasets" datasets = "datasets"
scoring_functions = "scoring_functions" scoring_functions = "scoring_functions"
benchmarks = "benchmarks" benchmarks = "benchmarks"

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Literal from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
@ -59,3 +59,35 @@ class ListVectorDBsResponse(BaseModel):
""" """
data: list[VectorDB] data: list[VectorDB]
@runtime_checkable
class VectorDBs(Protocol):
"""Internal protocol for vector_dbs routing - no public API endpoints."""
async def list_vector_dbs(self) -> ListVectorDBsResponse:
"""Internal method to list vector databases."""
...
async def get_vector_db(
self,
vector_db_id: str,
) -> VectorDB:
"""Internal method to get a vector database by ID."""
...
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> VectorDB:
"""Internal method to register a vector database."""
...
async def unregister_vector_db(self, vector_db_id: str) -> None:
"""Internal method to unregister a vector database."""
...

View file

@ -40,12 +40,20 @@ from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.external import load_external_apis from llama_stack.core.external import load_external_apis
from llama_stack.core.resolver import InvalidProviderError from llama_stack.core.resolver import InvalidProviderError
from llama_stack.core.stack import replace_env_vars from llama_stack.core.stack import replace_env_vars
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
StorageConfig,
)
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.exec import formulate_run_args, run_command from llama_stack.core.utils.exec import formulate_run_args, run_command
from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions" DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@ -286,21 +294,42 @@ def _generate_run_config(
Generate a run.yaml template file for user to edit from a build.yaml file Generate a run.yaml template file for user to edit from a build.yaml file
""" """
apis = list(build_config.distribution_spec.providers.keys()) apis = list(build_config.distribution_spec.providers.keys())
distro_dir = DISTRIBS_BASE_DIR / image_name
storage = StorageConfig(
backends={
"kv_default": SqliteKVStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
),
"sql_default": SqliteSqlStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
),
},
stores=ServerStoresConfig(
metadata=KVStoreReference(
backend="kv_default",
namespace="registry",
),
inference=InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
),
conversations=SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
),
),
)
run_config = StackRunConfig( run_config = StackRunConfig(
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
image_name=image_name, image_name=image_name,
apis=apis, apis=apis,
providers={}, providers={},
storage=storage,
external_providers_dir=build_config.external_providers_dir external_providers_dir=build_config.external_providers_dir
if build_config.external_providers_dir if build_config.external_providers_dir
else EXTERNAL_PROVIDERS_DIR, else EXTERNAL_PROVIDERS_DIR,
) )
if not run_config.inference_store:
run_config.inference_store = SqliteSqlStoreConfig(
**SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
)
)
# build providers dict # build providers dict
provider_registry = get_provider_registry(build_config) provider_registry = get_provider_registry(build_config)
for api in apis: for api in apis:

View file

@ -17,10 +17,19 @@ from llama_stack.core.datatypes import (
BuildConfig, BuildConfig,
Provider, Provider,
StackRunConfig, StackRunConfig,
StorageConfig,
) )
from llama_stack.core.distribution import get_provider_registry from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.resolver import InvalidProviderError from llama_stack.core.resolver import InvalidProviderError
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
)
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
@ -51,11 +60,23 @@ def generate_run_config(
Generate a run.yaml template file for user to edit from a build.yaml file Generate a run.yaml template file for user to edit from a build.yaml file
""" """
apis = list(build_config.distribution_spec.providers.keys()) apis = list(build_config.distribution_spec.providers.keys())
distro_dir = DISTRIBS_BASE_DIR / image_name
run_config = StackRunConfig( run_config = StackRunConfig(
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
image_name=image_name, image_name=image_name,
apis=apis, apis=apis,
providers={}, providers={},
storage=StorageConfig(
backends={
"kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
"sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
},
stores=ServerStoresConfig(
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
),
),
external_providers_dir=build_config.external_providers_dir external_providers_dir=build_config.external_providers_dir
if build_config.external_providers_dir if build_config.external_providers_dir
else EXTERNAL_PROVIDERS_DIR, else EXTERNAL_PROVIDERS_DIR,

View file

@ -159,6 +159,37 @@ def upgrade_from_routing_table(
config_dict["apis"] = config_dict["apis_to_serve"] config_dict["apis"] = config_dict["apis_to_serve"]
config_dict.pop("apis_to_serve", None) config_dict.pop("apis_to_serve", None)
# Add default storage config if not present
if "storage" not in config_dict:
config_dict["storage"] = {
"backends": {
"kv_default": {
"type": "kv_sqlite",
"db_path": "~/.llama/kvstore.db",
},
"sql_default": {
"type": "sql_sqlite",
"db_path": "~/.llama/sql_store.db",
},
},
"stores": {
"metadata": {
"namespace": "registry",
"backend": "kv_default",
},
"inference": {
"table_name": "inference_store",
"backend": "sql_default",
"max_write_queue_size": 10000,
"num_writers": 4,
},
"conversations": {
"table_name": "openai_conversations",
"backend": "sql_default",
},
},
}
return config_dict return config_dict

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import os
import secrets import secrets
import time import time
from typing import Any from typing import Any
@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import (
Conversations, Conversations,
Metadata, Metadata,
) )
from llama_stack.core.datatypes import AccessRule from llama_stack.core.datatypes import AccessRule, StackRunConfig
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
from llama_stack.providers.utils.sqlstore.sqlstore import ( from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
SqliteSqlStoreConfig,
SqlStoreConfig,
sqlstore_impl,
)
logger = get_logger(name=__name__, category="openai_conversations") logger = get_logger(name=__name__, category="openai_conversations")
@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations")
class ConversationServiceConfig(BaseModel): class ConversationServiceConfig(BaseModel):
"""Configuration for the built-in conversation service. """Configuration for the built-in conversation service.
:param conversations_store: SQL store configuration for conversations (defaults to SQLite) :param run_config: Stack run configuration for resolving persistence
:param policy: Access control rules :param policy: Access control rules
""" """
conversations_store: SqlStoreConfig = SqliteSqlStoreConfig( run_config: StackRunConfig
db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
)
policy: list[AccessRule] = [] policy: list[AccessRule] = []
@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations):
self.deps = deps self.deps = deps
self.policy = config.policy self.policy = config.policy
base_sql_store = sqlstore_impl(config.conversations_store) # Use conversations store reference from run config
conversations_ref = config.run_config.storage.stores.conversations
if not conversations_ref:
raise ValueError("storage.stores.conversations must be configured in run config")
base_sql_store = sqlstore_impl(conversations_ref)
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy) self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
async def initialize(self) -> None: async def initialize(self) -> None:
"""Initialize the store and create tables.""" """Initialize the store and create tables."""
if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
await self.sql_store.create_table( await self.sql_store.create_table(
"openai_conversations", "openai_conversations",
{ {

View file

@ -26,9 +26,12 @@ from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.storage.datatypes import (
KVStoreReference,
StorageBackendType,
StorageConfig,
)
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
LLAMA_STACK_BUILD_CONFIG_VERSION = 2 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
LLAMA_STACK_RUN_CONFIG_VERSION = 2 LLAMA_STACK_RUN_CONFIG_VERSION = 2
@ -351,12 +354,32 @@ class AuthenticationRequiredError(Exception):
pass pass
class QualifiedModel(BaseModel):
"""A qualified model identifier, consisting of a provider ID and a model ID."""
provider_id: str
model_id: str
class VectorStoresConfig(BaseModel):
"""Configuration for vector stores in the stack."""
default_provider_id: str | None = Field(
default=None,
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
)
default_embedding_model: QualifiedModel | None = Field(
default=None,
description="Default embedding model configuration for vector stores.",
)
class QuotaPeriod(StrEnum): class QuotaPeriod(StrEnum):
DAY = "day" DAY = "day"
class QuotaConfig(BaseModel): class QuotaConfig(BaseModel):
kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period") anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
authenticated_max_requests: int = Field( authenticated_max_requests: int = Field(
default=1000, description="Max requests for authenticated clients per period" default=1000, description="Max requests for authenticated clients per period"
@ -438,18 +461,6 @@ class ServerConfig(BaseModel):
) )
class InferenceStoreConfig(BaseModel):
sql_store_config: SqlStoreConfig
max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
num_writers: int = Field(default=4, description="Number of concurrent background writers")
class ResponsesStoreConfig(BaseModel):
sql_store_config: SqlStoreConfig
max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
num_writers: int = Field(default=4, description="Number of concurrent background writers")
class StackRunConfig(BaseModel): class StackRunConfig(BaseModel):
version: int = LLAMA_STACK_RUN_CONFIG_VERSION version: int = LLAMA_STACK_RUN_CONFIG_VERSION
@ -476,26 +487,8 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re
can be instantiated multiple times (with different configs) if necessary. can be instantiated multiple times (with different configs) if necessary.
""", """,
) )
metadata_store: KVStoreConfig | None = Field( storage: StorageConfig = Field(
default=None, description="Catalog of named storage backends and references available to the stack",
description="""
Configuration for the persistence store used by the distribution registry. If not specified,
a default SQLite store will be used.""",
)
inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
default=None,
description="""
Configuration for the persistence store used by the inference API. Can be either a
InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
If not specified, a default SQLite store will be used.""",
)
conversations_store: SqlStoreConfig | None = Field(
default=None,
description="""
Configuration for the persistence store used by the conversations API.
If not specified, a default SQLite store will be used.""",
) )
# registry of "resources" in the distribution # registry of "resources" in the distribution
@ -526,6 +519,11 @@ If not specified, a default SQLite store will be used.""",
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
) )
vector_stores: VectorStoresConfig | None = Field(
default=None,
description="Configuration for vector stores, including default embedding model",
)
@field_validator("external_providers_dir") @field_validator("external_providers_dir")
@classmethod @classmethod
def validate_external_providers_dir(cls, v): def validate_external_providers_dir(cls, v):
@ -535,6 +533,49 @@ If not specified, a default SQLite store will be used.""",
return Path(v) return Path(v)
return v return v
@model_validator(mode="after")
def validate_server_stores(self) -> "StackRunConfig":
backend_map = self.storage.backends
stores = self.storage.stores
kv_backends = {
name
for name, cfg in backend_map.items()
if cfg.type
in {
StorageBackendType.KV_REDIS,
StorageBackendType.KV_SQLITE,
StorageBackendType.KV_POSTGRES,
StorageBackendType.KV_MONGODB,
}
}
sql_backends = {
name
for name, cfg in backend_map.items()
if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
}
def _ensure_backend(reference, expected_set, store_name: str) -> None:
if reference is None:
return
backend_name = reference.backend
if backend_name not in backend_map:
raise ValueError(
f"{store_name} references unknown backend '{backend_name}'. "
f"Available backends: {sorted(backend_map)}"
)
if backend_name not in expected_set:
raise ValueError(
f"{store_name} references backend '{backend_name}' of type "
f"'{backend_map[backend_name].type.value}', but a backend of type "
f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
)
_ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
return self
class BuildConfig(BaseModel): class BuildConfig(BaseModel):
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION version: int = LLAMA_STACK_BUILD_CONFIG_VERSION

View file

@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
routing_table_api=Api.tool_groups, routing_table_api=Api.tool_groups,
router_api=Api.tool_runtime, router_api=Api.tool_runtime,
), ),
AutoRoutedApiInfo(
routing_table_api=Api.vector_dbs,
router_api=Api.vector_io,
),
] ]

View file

@ -11,9 +11,8 @@ from pydantic import BaseModel
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.datatypes import StackRunConfig
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
class PromptServiceConfig(BaseModel): class PromptServiceConfig(BaseModel):
@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts):
self.kvstore: KVStore self.kvstore: KVStore
async def initialize(self) -> None: async def initialize(self) -> None:
kvstore_config = SqliteKVStoreConfig( # Use metadata store backend with prompts-specific namespace
db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix() metadata_ref = self.config.run_config.storage.stores.metadata
) if not metadata_ref:
self.kvstore = await kvstore_impl(kvstore_config) raise ValueError("storage.stores.metadata must be configured in run config")
prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
self.kvstore = await kvstore_impl(prompts_ref)
def _get_default_key(self, prompt_id: str) -> str: def _get_default_key(self, prompt_id: str) -> str:
"""Get the KVStore key that stores the default version number.""" """Get the KVStore key that stores the default version number."""

View file

@ -29,6 +29,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
from llama_stack.apis.shields import Shields from llama_stack.apis.shields import Shields
from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.core.client import get_client_impl from llama_stack.core.client import get_client_impl
@ -81,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
Api.inspect: Inspect, Api.inspect: Inspect,
Api.batches: Batches, Api.batches: Batches,
Api.vector_io: VectorIO, Api.vector_io: VectorIO,
Api.vector_dbs: VectorDBs,
Api.models: Models, Api.models: Models,
Api.safety: Safety, Api.safety: Safety,
Api.shields: Shields, Api.shields: Shields,

View file

@ -6,7 +6,10 @@
from typing import Any from typing import Any
from llama_stack.core.datatypes import AccessRule, RoutedProtocol from llama_stack.core.datatypes import (
AccessRule,
RoutedProtocol,
)
from llama_stack.core.stack import StackRunConfig from llama_stack.core.stack import StackRunConfig
from llama_stack.core.store import DistributionRegistry from llama_stack.core.store import DistributionRegistry
from llama_stack.providers.datatypes import Api, RoutingTable from llama_stack.providers.datatypes import Api, RoutingTable
@ -26,6 +29,7 @@ async def get_routing_table_impl(
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
from ..routing_tables.shields import ShieldsRoutingTable from ..routing_tables.shields import ShieldsRoutingTable
from ..routing_tables.toolgroups import ToolGroupsRoutingTable from ..routing_tables.toolgroups import ToolGroupsRoutingTable
from ..routing_tables.vector_dbs import VectorDBsRoutingTable
api_to_tables = { api_to_tables = {
"models": ModelsRoutingTable, "models": ModelsRoutingTable,
@ -34,6 +38,7 @@ async def get_routing_table_impl(
"scoring_functions": ScoringFunctionsRoutingTable, "scoring_functions": ScoringFunctionsRoutingTable,
"benchmarks": BenchmarksRoutingTable, "benchmarks": BenchmarksRoutingTable,
"tool_groups": ToolGroupsRoutingTable, "tool_groups": ToolGroupsRoutingTable,
"vector_dbs": VectorDBsRoutingTable,
} }
if api.value not in api_to_tables: if api.value not in api_to_tables:
@ -76,14 +81,21 @@ async def get_auto_router_impl(
api_to_dep_impl[dep_name] = deps[dep_api] api_to_dep_impl[dep_name] = deps[dep_api]
# TODO: move pass configs to routers instead # TODO: move pass configs to routers instead
if api == Api.inference and run_config.inference_store: if api == Api.inference:
inference_ref = run_config.storage.stores.inference
if not inference_ref:
raise ValueError("storage.stores.inference must be configured in run config")
inference_store = InferenceStore( inference_store = InferenceStore(
config=run_config.inference_store, reference=inference_ref,
policy=policy, policy=policy,
) )
await inference_store.initialize() await inference_store.initialize()
api_to_dep_impl["store"] = inference_store api_to_dep_impl["store"] = inference_store
elif api == Api.vector_io:
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl) impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -31,6 +31,7 @@ from llama_stack.apis.vector_io import (
VectorStoreObject, VectorStoreObject,
VectorStoreSearchResponsePage, VectorStoreSearchResponsePage,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
@ -43,9 +44,11 @@ class VectorIORouter(VectorIO):
def __init__( def __init__(
self, self,
routing_table: RoutingTable, routing_table: RoutingTable,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
logger.debug("Initializing VectorIORouter") logger.debug("Initializing VectorIORouter")
self.routing_table = routing_table self.routing_table = routing_table
self.vector_stores_config = vector_stores_config
async def initialize(self) -> None: async def initialize(self) -> None:
logger.debug("VectorIORouter.initialize") logger.debug("VectorIORouter.initialize")
@ -122,6 +125,17 @@ class VectorIORouter(VectorIO):
embedding_dimension = extra.get("embedding_dimension") embedding_dimension = extra.get("embedding_dimension")
provider_id = extra.get("provider_id") provider_id = extra.get("provider_id")
# Use default embedding model if not specified
if (
embedding_model is None
and self.vector_stores_config
and self.vector_stores_config.default_embedding_model is not None
):
# Construct the full model ID with provider prefix
embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
model_id = self.vector_stores_config.default_embedding_model.model_id
embedding_model = f"{embedding_provider_id}/{model_id}"
if embedding_model is not None and embedding_dimension is None: if embedding_model is not None and embedding_dimension is None:
embedding_dimension = await self._get_embedding_model_dimension(embedding_model) embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
@ -132,11 +146,24 @@ class VectorIORouter(VectorIO):
raise ValueError("No vector_io providers available") raise ValueError("No vector_io providers available")
if num_providers > 1: if num_providers > 1:
available_providers = list(self.routing_table.impls_by_provider_id.keys()) available_providers = list(self.routing_table.impls_by_provider_id.keys())
raise ValueError( # Use default configured provider
f"Multiple vector_io providers available. Please specify provider_id in extra_body. " if self.vector_stores_config and self.vector_stores_config.default_provider_id:
f"Available providers: {available_providers}" default_provider = self.vector_stores_config.default_provider_id
) if default_provider in available_providers:
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] provider_id = default_provider
logger.debug(f"Using configured default vector store provider: {provider_id}")
else:
raise ValueError(
f"Configured default vector store provider '{default_provider}' not found. "
f"Available providers: {available_providers}"
)
else:
raise ValueError(
f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
f"Available providers: {available_providers}"
)
else:
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
vector_db_id = f"vs_{uuid.uuid4()}" vector_db_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db( registered_vector_db = await self.routing_table.register_vector_db(
@ -243,8 +270,7 @@ class VectorIORouter(VectorIO):
vector_store_id: str, vector_store_id: str,
) -> VectorStoreDeleteResponse: ) -> VectorStoreDeleteResponse:
logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
provider = await self.routing_table.get_provider_impl(vector_store_id) return await self.routing_table.openai_delete_vector_store(vector_store_id)
return await provider.openai_delete_vector_store(vector_store_id)
async def openai_search_vector_store( async def openai_search_vector_store(
self, self,

View file

@ -134,12 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
from .scoring_functions import ScoringFunctionsRoutingTable from .scoring_functions import ScoringFunctionsRoutingTable
from .shields import ShieldsRoutingTable from .shields import ShieldsRoutingTable
from .toolgroups import ToolGroupsRoutingTable from .toolgroups import ToolGroupsRoutingTable
from .vector_dbs import VectorDBsRoutingTable
def apiname_object(): def apiname_object():
if isinstance(self, ModelsRoutingTable): if isinstance(self, ModelsRoutingTable):
return ("Inference", "model") return ("Inference", "model")
elif isinstance(self, ShieldsRoutingTable): elif isinstance(self, ShieldsRoutingTable):
return ("Safety", "shield") return ("Safety", "shield")
elif isinstance(self, VectorDBsRoutingTable):
return ("VectorIO", "vector_db")
elif isinstance(self, DatasetsRoutingTable): elif isinstance(self, DatasetsRoutingTable):
return ("DatasetIO", "dataset") return ("DatasetIO", "dataset")
elif isinstance(self, ScoringFunctionsRoutingTable): elif isinstance(self, ScoringFunctionsRoutingTable):

View file

@ -0,0 +1,323 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from pydantic import TypeAdapter
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.models import ModelType
from llama_stack.apis.resource import ResourceType
# Removed VectorDBs import to avoid exposing public API
from llama_stack.apis.vector_io.vector_io import (
OpenAICreateVectorStoreRequestWithExtraBody,
SearchRankingOptions,
VectorStoreChunkingStrategy,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse,
VectorStoreFileDeleteResponse,
VectorStoreFileObject,
VectorStoreFileStatus,
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.core.datatypes import (
VectorDBWithOwner,
)
from llama_stack.log import get_logger
from .common import CommonRoutingTableImpl, lookup_model
logger = get_logger(name=__name__, category="core::routing_tables")
class VectorDBsRoutingTable(CommonRoutingTableImpl):
"""Internal routing table for vector_db operations.
Does not inherit from VectorDBs to avoid exposing public API endpoints.
Only provides internal routing functionality for VectorIORouter.
"""
# Internal methods only - no public API exposure
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
provider_vector_db_id: str | None = None,
vector_db_name: str | None = None,
) -> Any:
if provider_id is None:
if len(self.impls_by_provider_id) > 0:
provider_id = list(self.impls_by_provider_id.keys())[0]
if len(self.impls_by_provider_id) > 1:
logger.warning(
f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
)
else:
raise ValueError("No provider available. Please configure a vector_io provider.")
model = await lookup_model(self, embedding_model)
if model is None:
raise ModelNotFoundError(embedding_model)
if model.model_type != ModelType.embedding:
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
if "embedding_dimension" not in model.metadata:
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
try:
provider = self.impls_by_provider_id[provider_id]
except KeyError:
available_providers = list(self.impls_by_provider_id.keys())
raise ValueError(
f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
) from None
logger.warning(
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
)
request = OpenAICreateVectorStoreRequestWithExtraBody(
name=vector_db_name or vector_db_id,
embedding_model=embedding_model,
embedding_dimension=model.metadata["embedding_dimension"],
provider_id=provider_id,
provider_vector_db_id=provider_vector_db_id,
)
vector_store = await provider.openai_create_vector_store(request)
vector_store_id = vector_store.id
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
logger.warning(
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
)
vector_db_data = {
"identifier": vector_store_id,
"type": ResourceType.vector_db.value,
"provider_id": provider_id,
"provider_resource_id": actual_provider_vector_db_id,
"embedding_model": embedding_model,
"embedding_dimension": model.metadata["embedding_dimension"],
"vector_db_name": vector_store.name,
}
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
await self.register_object(vector_db)
return vector_db
async def openai_retrieve_vector_store(
self,
vector_store_id: str,
) -> VectorStoreObject:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store(vector_store_id)
async def openai_update_vector_store(
self,
vector_store_id: str,
name: str | None = None,
expires_after: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> VectorStoreObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store(
vector_store_id=vector_store_id,
name=name,
expires_after=expires_after,
metadata=metadata,
)
async def openai_delete_vector_store(
self,
vector_store_id: str,
) -> VectorStoreDeleteResponse:
await self.assert_action_allowed("delete", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
result = await provider.openai_delete_vector_store(vector_store_id)
await self.unregister_vector_db(vector_store_id)
return result
async def unregister_vector_db(self, vector_store_id: str) -> None:
"""Remove the vector store from the routing table registry."""
try:
vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id)
if vector_db_obj:
await self.unregister_object(vector_db_obj)
except Exception as e:
# Log the error but don't fail the operation
logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
async def openai_search_vector_store(
self,
vector_store_id: str,
query: str | list[str],
filters: dict[str, Any] | None = None,
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_search_vector_store(
vector_store_id=vector_store_id,
query=query,
filters=filters,
max_num_results=max_num_results,
ranking_options=ranking_options,
rewrite_query=rewrite_query,
search_mode=search_mode,
)
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id,
file_id=file_id,
attributes=attributes,
chunking_strategy=chunking_strategy,
)
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> list[VectorStoreFileObject]:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store(
vector_store_id=vector_store_id,
limit=limit,
order=order,
after=after,
before=before,
filter=filter,
)
async def openai_retrieve_vector_store_file(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileObject:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file(
vector_store_id=vector_store_id,
file_id=file_id,
)
async def openai_retrieve_vector_store_file_contents(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(
vector_store_id=vector_store_id,
file_id=file_id,
)
async def openai_update_vector_store_file(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any],
) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store_file(
vector_store_id=vector_store_id,
file_id=file_id,
attributes=attributes,
)
async def openai_delete_vector_store_file(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileDeleteResponse:
await self.assert_action_allowed("delete", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store_file(
vector_store_id=vector_store_id,
file_id=file_id,
)
async def openai_create_vector_store_file_batch(
self,
vector_store_id: str,
file_ids: list[str],
attributes: dict[str, Any] | None = None,
chunking_strategy: Any | None = None,
):
await self.assert_action_allowed("update", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_create_vector_store_file_batch(
vector_store_id=vector_store_id,
file_ids=file_ids,
attributes=attributes,
chunking_strategy=chunking_strategy,
)
async def openai_retrieve_vector_store_file_batch(
self,
batch_id: str,
vector_store_id: str,
):
await self.assert_action_allowed("read", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_batch(
batch_id=batch_id,
vector_store_id=vector_store_id,
)
async def openai_list_files_in_vector_store_file_batch(
self,
batch_id: str,
vector_store_id: str,
after: str | None = None,
before: str | None = None,
filter: str | None = None,
limit: int | None = 20,
order: str | None = "desc",
):
await self.assert_action_allowed("read", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store_file_batch(
batch_id=batch_id,
vector_store_id=vector_store_id,
after=after,
before=before,
filter=filter,
limit=limit,
order=order,
)
async def openai_cancel_vector_store_file_batch(
self,
batch_id: str,
vector_store_id: str,
):
await self.assert_action_allowed("update", "vector_db", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_cancel_vector_store_file_batch(
batch_id=batch_id,
vector_store_id=vector_store_id,
)

View file

@ -72,13 +72,30 @@ class AuthProvider(ABC):
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]: def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
attributes: dict[str, list[str]] = {} attributes: dict[str, list[str]] = {}
for claim_key, attribute_key in mapping.items(): for claim_key, attribute_key in mapping.items():
if claim_key not in claims: # First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles")
# Then fall back to literal key with dots (e.g., "my.dotted.key")
claim: object = claims
keys = claim_key.split(".")
for key in keys:
if isinstance(claim, dict) and key in claim:
claim = claim[key]
else:
claim = None
break
if claim is None and claim_key in claims:
# Fall back to checking if claim_key exists as a literal key
claim = claims[claim_key]
if claim is None:
continue continue
claim = claims[claim_key]
if isinstance(claim, list): if isinstance(claim, list):
values = claim values = claim
else: elif isinstance(claim, str):
values = claim.split() values = claim.split()
else:
continue
if attribute_key in attributes: if attribute_key in attributes:
attributes[attribute_key].extend(values) attributes[attribute_key].extend(values)

View file

@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta
from starlette.types import ASGIApp, Receive, Scope, Send from starlette.types import ASGIApp, Receive, Scope, Send
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
logger = get_logger(name=__name__, category="core::server") logger = get_logger(name=__name__, category="core::server")
@ -33,7 +33,7 @@ class QuotaMiddleware:
def __init__( def __init__(
self, self,
app: ASGIApp, app: ASGIApp,
kv_config: KVStoreConfig, kv_config: KVStoreReference,
anonymous_max_requests: int, anonymous_max_requests: int,
authenticated_max_requests: int, authenticated_max_requests: int,
window_seconds: int = 86400, window_seconds: int = 86400,
@ -45,15 +45,15 @@ class QuotaMiddleware:
self.authenticated_max_requests = authenticated_max_requests self.authenticated_max_requests = authenticated_max_requests
self.window_seconds = window_seconds self.window_seconds = window_seconds
if isinstance(self.kv_config, SqliteKVStoreConfig):
logger.warning(
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
f"window_seconds={self.window_seconds}"
)
async def _get_kv(self) -> KVStore: async def _get_kv(self) -> KVStore:
if self.kv is None: if self.kv is None:
self.kv = await kvstore_impl(self.kv_config) self.kv = await kvstore_impl(self.kv_config)
backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
logger.warning(
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
f"window_seconds={self.window_seconds}"
)
return self.kv return self.kv
async def __call__(self, scope: Scope, receive: Receive, send: Send): async def __call__(self, scope: Scope, receive: Receive, send: Send):

View file

@ -35,13 +35,23 @@ from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
from llama_stack.core.distribution import get_provider_registry from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
from llama_stack.core.resolver import ProviderRegistry, resolve_impls from llama_stack.core.resolver import ProviderRegistry, resolve_impls
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
StorageBackendConfig,
StorageConfig,
)
from llama_stack.core.store.registry import create_dist_registry from llama_stack.core.store.registry import create_dist_registry
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -98,30 +108,6 @@ REGISTRY_REFRESH_TASK = None
TEST_RECORDING_CONTEXT = None TEST_RECORDING_CONTEXT = None
async def validate_default_embedding_model(impls: dict[Api, Any]):
"""Validate that at most one embedding model is marked as default."""
if Api.models not in impls:
return
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
default_embedding_models = []
for model in models_list:
if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
default_embedding_models.append(model.identifier)
if len(default_embedding_models) > 1:
raise ValueError(
f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
"Only one embedding model can be marked as default."
)
if default_embedding_models:
logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
for rsrc, api, register_method, list_method in RESOURCES: for rsrc, api, register_method, list_method in RESOURCES:
objects = getattr(run_config, rsrc) objects = getattr(run_config, rsrc)
@ -152,7 +138,41 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}", f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
) )
await validate_default_embedding_model(impls)
async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
"""Validate vector stores configuration."""
if vector_stores_config is None:
return
default_embedding_model = vector_stores_config.default_embedding_model
if default_embedding_model is None:
return
provider_id = default_embedding_model.provider_id
model_id = default_embedding_model.model_id
default_model_id = f"{provider_id}/{model_id}"
if Api.models not in impls:
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
default_model = models_list.get(default_model_id)
if default_model is None:
raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
embedding_dimension = default_model.metadata.get("embedding_dimension")
if embedding_dimension is None:
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
try:
int(embedding_dimension)
except ValueError as err:
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
class EnvVarError(Exception): class EnvVarError(Exception):
@ -329,6 +349,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
impls[Api.conversations] = conversations_impl impls[Api.conversations] = conversations_impl
def _initialize_storage(run_config: StackRunConfig):
kv_backends: dict[str, StorageBackendConfig] = {}
sql_backends: dict[str, StorageBackendConfig] = {}
for backend_name, backend_config in run_config.storage.backends.items():
type = backend_config.type.value
if type.startswith("kv_"):
kv_backends[backend_name] = backend_config
elif type.startswith("sql_"):
sql_backends[backend_name] = backend_config
else:
raise ValueError(f"Unknown storage backend type: {type}")
from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
register_kvstore_backends(kv_backends)
register_sqlstore_backends(sql_backends)
class Stack: class Stack:
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None): def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
self.run_config = run_config self.run_config = run_config
@ -347,7 +386,11 @@ class Stack:
TEST_RECORDING_CONTEXT.__enter__() TEST_RECORDING_CONTEXT.__enter__()
logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}") logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name) _initialize_storage(self.run_config)
stores = self.run_config.storage.stores
if not stores.metadata:
raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else [] policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
internal_impls = {} internal_impls = {}
@ -367,8 +410,8 @@ class Stack:
await impls[Api.conversations].initialize() await impls[Api.conversations].initialize()
await register_resources(self.run_config, impls) await register_resources(self.run_config, impls)
await refresh_registry_once(impls) await refresh_registry_once(impls)
await validate_vector_stores_config(self.run_config.vector_stores, impls)
self.impls = impls self.impls = impls
def create_registry_refresh_task(self): def create_registry_refresh_task(self):
@ -488,5 +531,16 @@ def run_config_from_adhoc_config_spec(
image_name="distro-test", image_name="distro-test",
apis=list(provider_configs_by_api.keys()), apis=list(provider_configs_by_api.keys()),
providers=provider_configs_by_api, providers=provider_configs_by_api,
storage=StorageConfig(
backends={
"kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"),
"sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"),
},
stores=ServerStoresConfig(
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
),
),
) )
return config return config

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,283 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import re
from abc import abstractmethod
from enum import StrEnum
from pathlib import Path
from typing import Annotated, Literal
from pydantic import BaseModel, Field, field_validator
class StorageBackendType(StrEnum):
KV_REDIS = "kv_redis"
KV_SQLITE = "kv_sqlite"
KV_POSTGRES = "kv_postgres"
KV_MONGODB = "kv_mongodb"
SQL_SQLITE = "sql_sqlite"
SQL_POSTGRES = "sql_postgres"
class CommonConfig(BaseModel):
namespace: str | None = Field(
default=None,
description="All keys will be prefixed with this namespace",
)
class RedisKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS
host: str = "localhost"
port: int = 6379
@property
def url(self) -> str:
return f"redis://{self.host}:{self.port}"
@classmethod
def pip_packages(cls) -> list[str]:
return ["redis"]
@classmethod
def sample_run_config(cls):
return {
"type": StorageBackendType.KV_REDIS.value,
"host": "${env.REDIS_HOST:=localhost}",
"port": "${env.REDIS_PORT:=6379}",
}
class SqliteKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE
db_path: str = Field(
description="File path for the sqlite database",
)
@classmethod
def pip_packages(cls) -> list[str]:
return ["aiosqlite"]
@classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
return {
"type": StorageBackendType.KV_SQLITE.value,
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
}
class PostgresKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
host: str = "localhost"
port: int | str = 5432
db: str = "llamastack"
user: str
password: str | None = None
ssl_mode: str | None = None
ca_cert_path: str | None = None
table_name: str = "llamastack_kvstore"
@classmethod
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
return {
"type": StorageBackendType.KV_POSTGRES.value,
"host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:=llamastack}",
"user": "${env.POSTGRES_USER:=llamastack}",
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
"table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
}
@classmethod
@field_validator("table_name")
def validate_table_name(cls, v: str) -> str:
# PostgreSQL identifiers rules:
# - Must start with a letter or underscore
# - Can contain letters, numbers, and underscores
# - Maximum length is 63 bytes
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
if not re.match(pattern, v):
raise ValueError(
"Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
)
if len(v) > 63:
raise ValueError("Table name must be less than 63 characters")
return v
@classmethod
def pip_packages(cls) -> list[str]:
return ["psycopg2-binary"]
class MongoDBKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB
host: str = "localhost"
port: int = 27017
db: str = "llamastack"
user: str | None = None
password: str | None = None
collection_name: str = "llamastack_kvstore"
@classmethod
def pip_packages(cls) -> list[str]:
return ["pymongo"]
@classmethod
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
return {
"type": StorageBackendType.KV_MONGODB.value,
"host": "${env.MONGODB_HOST:=localhost}",
"port": "${env.MONGODB_PORT:=5432}",
"db": "${env.MONGODB_DB}",
"user": "${env.MONGODB_USER}",
"password": "${env.MONGODB_PASSWORD}",
"collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
}
class SqlAlchemySqlStoreConfig(BaseModel):
@property
@abstractmethod
def engine_str(self) -> str: ...
# TODO: move this when we have a better way to specify dependencies with internal APIs
@classmethod
def pip_packages(cls) -> list[str]:
return ["sqlalchemy[asyncio]"]
class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
db_path: str = Field(
description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
)
@property
def engine_str(self) -> str:
return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
@classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
return {
"type": StorageBackendType.SQL_SQLITE.value,
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
}
@classmethod
def pip_packages(cls) -> list[str]:
return super().pip_packages() + ["aiosqlite"]
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
host: str = "localhost"
port: int | str = 5432
db: str = "llamastack"
user: str
password: str | None = None
@property
def engine_str(self) -> str:
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
@classmethod
def pip_packages(cls) -> list[str]:
return super().pip_packages() + ["asyncpg"]
@classmethod
def sample_run_config(cls, **kwargs):
return {
"type": StorageBackendType.SQL_POSTGRES.value,
"host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:=llamastack}",
"user": "${env.POSTGRES_USER:=llamastack}",
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
}
# reference = (backend_name, table_name)
class SqlStoreReference(BaseModel):
"""A reference to a 'SQL-like' persistent store. A table name must be provided."""
table_name: str = Field(
description="Name of the table to use for the SqlStore",
)
backend: str = Field(
description="Name of backend from storage.backends",
)
# reference = (backend_name, namespace)
class KVStoreReference(BaseModel):
"""A reference to a 'key-value' persistent store. A namespace must be provided."""
namespace: str = Field(
description="Key prefix for KVStore backends",
)
backend: str = Field(
description="Name of backend from storage.backends",
)
StorageBackendConfig = Annotated[
RedisKVStoreConfig
| SqliteKVStoreConfig
| PostgresKVStoreConfig
| MongoDBKVStoreConfig
| SqliteSqlStoreConfig
| PostgresSqlStoreConfig,
Field(discriminator="type"),
]
class InferenceStoreReference(SqlStoreReference):
"""Inference store configuration with queue tuning."""
max_write_queue_size: int = Field(
default=10000,
description="Max queued writes for inference store",
)
num_writers: int = Field(
default=4,
description="Number of concurrent background writers",
)
class ResponsesStoreReference(InferenceStoreReference):
"""Responses store configuration with queue tuning."""
class ServerStoresConfig(BaseModel):
metadata: KVStoreReference | None = Field(
default=None,
description="Metadata store configuration (uses KV backend)",
)
inference: InferenceStoreReference | None = Field(
default=None,
description="Inference store configuration (uses SQL backend)",
)
conversations: SqlStoreReference | None = Field(
default=None,
description="Conversations store configuration (uses SQL backend)",
)
responses: ResponsesStoreReference | None = Field(
default=None,
description="Responses store configuration (uses SQL backend)",
)
class StorageConfig(BaseModel):
backends: dict[str, StorageBackendConfig] = Field(
description="Named backend configurations (e.g., 'default', 'cache')",
)
stores: ServerStoresConfig = Field(
default_factory=lambda: ServerStoresConfig(),
description="Named references to storage backends used by the stack core",
)

View file

@ -11,10 +11,9 @@ from typing import Protocol
import pydantic import pydantic
from llama_stack.core.datatypes import RoutableObjectWithProvider from llama_stack.core.datatypes import RoutableObjectWithProvider
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
logger = get_logger(__name__, category="core::registry") logger = get_logger(__name__, category="core::registry")
@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
async def create_dist_registry( async def create_dist_registry(
metadata_store: KVStoreConfig | None, metadata_store: KVStoreReference, image_name: str
image_name: str,
) -> tuple[CachedDiskDistributionRegistry, KVStore]: ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
# instantiate kvstore for storing and retrieving distribution metadata # instantiate kvstore for storing and retrieving distribution metadata
if metadata_store: dist_kvstore = await kvstore_impl(metadata_store)
dist_kvstore = await kvstore_impl(metadata_store)
else:
dist_kvstore = await kvstore_impl(
SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
)
dist_registry = CachedDiskDistributionRegistry(dist_kvstore) dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
await dist_registry.initialize() await dist_registry.initialize()
return dist_registry, dist_kvstore return dist_registry, dist_kvstore

View file

@ -25,6 +25,8 @@ distribution_spec:
- provider_type: inline::milvus - provider_type: inline::milvus
- provider_type: remote::chromadb - provider_type: remote::chromadb
- provider_type: remote::pgvector - provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files: files:
- provider_type: inline::localfs - provider_type: inline::localfs
safety: safety:

View file

@ -93,30 +93,30 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db backend: kv_default
- provider_id: sqlite-vec - provider_id: sqlite-vec
provider_type: inline::sqlite-vec provider_type: inline::sqlite-vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
kvstore: persistence:
type: sqlite namespace: vector_io::sqlite_vec
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus} - provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus provider_type: inline::milvus
config: config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
kvstore: persistence:
type: sqlite namespace: vector_io::milvus
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb} - provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector provider_type: remote::pgvector
config: config:
@ -125,17 +125,32 @@ providers:
db: ${env.PGVECTOR_DB:=} db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=} user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=} password: ${env.PGVECTOR_PASSWORD:=}
kvstore: persistence:
type: sqlite namespace: vector_io::pgvector
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files: files:
- provider_id: meta-reference-files - provider_id: meta-reference-files
provider_type: inline::localfs provider_type: inline::localfs
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db backend: sql_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training: post_training:
- provider_id: torchtune-cpu - provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu provider_type: inline::torchtune-cpu
@ -163,21 +181,21 @@ providers:
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -207,17 +225,28 @@ providers:
provider_type: inline::reference provider_type: inline::reference
config: config:
kvstore: kvstore:
type: sqlite namespace: batches
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/batches.db backend: kv_default
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: shields:
- shield_id: llama-guard - shield_id: llama-guard
@ -239,3 +268,8 @@ server:
port: 8321 port: 8321
telemetry: telemetry:
enabled: true enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -26,9 +26,9 @@ providers:
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db backend: kv_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -38,32 +38,35 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -86,15 +89,26 @@ providers:
max_results: 3 max_results: 3
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -22,9 +22,9 @@ providers:
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db backend: kv_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -34,32 +34,35 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -82,15 +85,26 @@ providers:
max_results: 3 max_results: 3
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -37,9 +37,9 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db backend: kv_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -49,32 +49,35 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -99,15 +102,26 @@ providers:
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -27,9 +27,9 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db backend: kv_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -39,32 +39,35 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -89,15 +92,26 @@ providers:
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -28,9 +28,9 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db backend: kv_default
safety: safety:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
@ -41,12 +41,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
@ -65,8 +68,8 @@ providers:
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db backend: kv_default
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
@ -86,17 +89,28 @@ providers:
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db backend: sql_default
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -23,9 +23,9 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db backend: kv_default
safety: safety:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
@ -36,12 +36,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
@ -75,17 +78,28 @@ providers:
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db backend: sql_default
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: [] shields: []
vector_dbs: [] vector_dbs: []

View file

@ -39,16 +39,16 @@ providers:
provider_type: inline::sqlite-vec provider_type: inline::sqlite-vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
kvstore: persistence:
type: sqlite namespace: vector_io::sqlite_vec
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec_registry.db backend: kv_default
- provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/chroma_remote_registry.db backend: kv_default
- provider_id: ${env.ENABLE_PGVECTOR:+pgvector} - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
provider_type: remote::pgvector provider_type: remote::pgvector
config: config:
@ -57,9 +57,9 @@ providers:
db: ${env.PGVECTOR_DB:=} db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=} user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=} password: ${env.PGVECTOR_PASSWORD:=}
kvstore: persistence:
type: sqlite namespace: vector_io::pgvector
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/pgvector_registry.db backend: kv_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -69,32 +69,35 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -119,15 +122,26 @@ providers:
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: gpt-4o model_id: gpt-4o

View file

@ -91,7 +91,6 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 768, "embedding_dimension": 768,
}, },
) )
postgres_config = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
distro_type="self_hosted", distro_type="self_hosted",
@ -105,22 +104,16 @@ def get_distribution_template() -> DistributionTemplate:
provider_overrides={ provider_overrides={
"inference": inference_providers + [embedding_provider], "inference": inference_providers + [embedding_provider],
"vector_io": vector_io_providers, "vector_io": vector_io_providers,
"agents": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
persistence_store=postgres_config,
responses_store=postgres_config,
),
)
],
}, },
default_models=default_models + [embedding_model], default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
metadata_store=PostgresKVStoreConfig.sample_run_config(), storage_backends={
inference_store=postgres_config, "kv_default": PostgresKVStoreConfig.sample_run_config(
table_name="llamastack_kvstore",
),
"sql_default": PostgresSqlStoreConfig.sample_run_config(),
},
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -22,9 +22,9 @@ providers:
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db backend: kv_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -34,20 +34,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: postgres agent_state:
host: ${env.POSTGRES_HOST:=localhost} namespace: agents
port: ${env.POSTGRES_PORT:=5432} backend: kv_default
db: ${env.POSTGRES_DB:=llamastack} responses:
user: ${env.POSTGRES_USER:=llamastack} table_name: responses
password: ${env.POSTGRES_PASSWORD:=llamastack} backend: sql_default
responses_store: max_write_queue_size: 10000
type: postgres num_writers: 4
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
tool_runtime: tool_runtime:
- provider_id: brave-search - provider_id: brave-search
provider_type: remote::brave-search provider_type: remote::brave-search
@ -63,24 +58,35 @@ providers:
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
metadata_store: storage:
type: postgres backends:
host: ${env.POSTGRES_HOST:=localhost} kv_default:
port: ${env.POSTGRES_PORT:=5432} type: kv_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} user: ${env.POSTGRES_USER:=llamastack}
inference_store: password: ${env.POSTGRES_PASSWORD:=llamastack}
type: postgres table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
host: ${env.POSTGRES_HOST:=localhost} sql_default:
port: ${env.POSTGRES_PORT:=5432} type: sql_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
conversations_store: user: ${env.POSTGRES_USER:=llamastack}
type: sqlite password: ${env.POSTGRES_PASSWORD:=llamastack}
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -26,6 +26,8 @@ distribution_spec:
- provider_type: inline::milvus - provider_type: inline::milvus
- provider_type: remote::chromadb - provider_type: remote::chromadb
- provider_type: remote::pgvector - provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files: files:
- provider_type: inline::localfs - provider_type: inline::localfs
safety: safety:

View file

@ -93,30 +93,30 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db backend: kv_default
- provider_id: sqlite-vec - provider_id: sqlite-vec
provider_type: inline::sqlite-vec provider_type: inline::sqlite-vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
kvstore: persistence:
type: sqlite namespace: vector_io::sqlite_vec
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus} - provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus provider_type: inline::milvus
config: config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
kvstore: persistence:
type: sqlite namespace: vector_io::milvus
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb} - provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector provider_type: remote::pgvector
config: config:
@ -125,17 +125,32 @@ providers:
db: ${env.PGVECTOR_DB:=} db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=} user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=} password: ${env.PGVECTOR_PASSWORD:=}
kvstore: persistence:
type: sqlite namespace: vector_io::pgvector
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files: files:
- provider_id: meta-reference-files - provider_id: meta-reference-files
provider_type: inline::localfs provider_type: inline::localfs
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db backend: sql_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training: post_training:
- provider_id: huggingface-gpu - provider_id: huggingface-gpu
provider_type: inline::huggingface-gpu provider_type: inline::huggingface-gpu
@ -166,21 +184,21 @@ providers:
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -210,17 +228,28 @@ providers:
provider_type: inline::reference provider_type: inline::reference
config: config:
kvstore: kvstore:
type: sqlite namespace: batches
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db backend: kv_default
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: shields:
- shield_id: llama-guard - shield_id: llama-guard
@ -242,3 +271,8 @@ server:
port: 8321 port: 8321
telemetry: telemetry:
enabled: true enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -26,6 +26,8 @@ distribution_spec:
- provider_type: inline::milvus - provider_type: inline::milvus
- provider_type: remote::chromadb - provider_type: remote::chromadb
- provider_type: remote::pgvector - provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files: files:
- provider_type: inline::localfs - provider_type: inline::localfs
safety: safety:

View file

@ -93,30 +93,30 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db backend: kv_default
- provider_id: sqlite-vec - provider_id: sqlite-vec
provider_type: inline::sqlite-vec provider_type: inline::sqlite-vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
kvstore: persistence:
type: sqlite namespace: vector_io::sqlite_vec
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus} - provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus provider_type: inline::milvus
config: config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
kvstore: persistence:
type: sqlite namespace: vector_io::milvus
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb} - provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: sqlite namespace: vector_io::chroma_remote
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector provider_type: remote::pgvector
config: config:
@ -125,17 +125,32 @@ providers:
db: ${env.PGVECTOR_DB:=} db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=} user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=} password: ${env.PGVECTOR_PASSWORD:=}
kvstore: persistence:
type: sqlite namespace: vector_io::pgvector
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files: files:
- provider_id: meta-reference-files - provider_id: meta-reference-files
provider_type: inline::localfs provider_type: inline::localfs
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db backend: sql_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training: post_training:
- provider_id: torchtune-cpu - provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu provider_type: inline::torchtune-cpu
@ -163,21 +181,21 @@ providers:
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -207,17 +225,28 @@ providers:
provider_type: inline::reference provider_type: inline::reference
config: config:
kvstore: kvstore:
type: sqlite namespace: batches
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db backend: kv_default
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: shields:
- shield_id: llama-guard - shield_id: llama-guard
@ -239,3 +268,8 @@ server:
port: 8321 port: 8321
telemetry: telemetry:
enabled: true enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -11,8 +11,10 @@ from llama_stack.core.datatypes import (
BuildProvider, BuildProvider,
Provider, Provider,
ProviderSpec, ProviderSpec,
QualifiedModel,
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
VectorStoresConfig,
) )
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
@ -31,6 +33,8 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC
from llama_stack.providers.remote.vector_io.pgvector.config import ( from llama_stack.providers.remote.vector_io.pgvector.config import (
PGVectorVectorIOConfig, PGVectorVectorIOConfig,
) )
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
@ -113,6 +117,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
BuildProvider(provider_type="inline::milvus"), BuildProvider(provider_type="inline::milvus"),
BuildProvider(provider_type="remote::chromadb"), BuildProvider(provider_type="remote::chromadb"),
BuildProvider(provider_type="remote::pgvector"), BuildProvider(provider_type="remote::pgvector"),
BuildProvider(provider_type="remote::qdrant"),
BuildProvider(provider_type="remote::weaviate"),
], ],
"files": [BuildProvider(provider_type="inline::localfs")], "files": [BuildProvider(provider_type="inline::localfs")],
"safety": [ "safety": [
@ -221,12 +227,35 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
password="${env.PGVECTOR_PASSWORD:=}", password="${env.PGVECTOR_PASSWORD:=}",
), ),
), ),
Provider(
provider_id="${env.QDRANT_URL:+qdrant}",
provider_type="remote::qdrant",
config=QdrantVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.QDRANT_URL:=}",
),
),
Provider(
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
provider_type="remote::weaviate",
config=WeaviateVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
),
),
], ],
"files": [files_provider], "files": [files_provider],
}, },
default_models=[], default_models=[],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
default_shields=default_shields, default_shields=default_shields,
vector_stores_config=VectorStoresConfig(
default_provider_id="faiss",
default_embedding_model=QualifiedModel(
provider_id="sentence-transformers",
model_id="nomic-ai/nomic-embed-text-v1.5",
),
),
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -27,8 +27,15 @@ from llama_stack.core.datatypes import (
ShieldInput, ShieldInput,
TelemetryConfig, TelemetryConfig,
ToolGroupInput, ToolGroupInput,
VectorStoresConfig,
) )
from llama_stack.core.distribution import get_provider_registry from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
SqlStoreReference,
StorageBackendType,
)
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
@ -180,10 +187,10 @@ class RunConfigSettings(BaseModel):
default_tool_groups: list[ToolGroupInput] | None = None default_tool_groups: list[ToolGroupInput] | None = None
default_datasets: list[DatasetInput] | None = None default_datasets: list[DatasetInput] | None = None
default_benchmarks: list[BenchmarkInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None
metadata_store: dict | None = None vector_stores_config: VectorStoresConfig | None = None
inference_store: dict | None = None
conversations_store: dict | None = None
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
storage_backends: dict[str, Any] | None = None
storage_stores: dict[str, Any] | None = None
def run_config( def run_config(
self, self,
@ -226,28 +233,45 @@ class RunConfigSettings(BaseModel):
# Get unique set of APIs from providers # Get unique set of APIs from providers
apis = sorted(providers.keys()) apis = sorted(providers.keys())
storage_backends = self.storage_backends or {
"kv_default": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="kvstore.db",
),
"sql_default": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="sql_store.db",
),
}
storage_stores = self.storage_stores or {
"metadata": KVStoreReference(
backend="kv_default",
namespace="registry",
).model_dump(exclude_none=True),
"inference": InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
).model_dump(exclude_none=True),
"conversations": SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
).model_dump(exclude_none=True),
}
storage_config = dict(
backends=storage_backends,
stores=storage_stores,
)
# Return a dict that matches StackRunConfig structure # Return a dict that matches StackRunConfig structure
return { config = {
"version": LLAMA_STACK_RUN_CONFIG_VERSION, "version": LLAMA_STACK_RUN_CONFIG_VERSION,
"image_name": name, "image_name": name,
"container_image": container_image, "container_image": container_image,
"apis": apis, "apis": apis,
"providers": provider_configs, "providers": provider_configs,
"metadata_store": self.metadata_store "storage": storage_config,
or SqliteKVStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="registry.db",
),
"inference_store": self.inference_store
or SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="inference_store.db",
),
"conversations_store": self.conversations_store
or SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="conversations.db",
),
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
"vector_dbs": [], "vector_dbs": [],
@ -261,6 +285,11 @@ class RunConfigSettings(BaseModel):
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
} }
if self.vector_stores_config:
config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
return config
class DistributionTemplate(BaseModel): class DistributionTemplate(BaseModel):
""" """
@ -297,11 +326,15 @@ class DistributionTemplate(BaseModel):
# We should have a better way to do this by formalizing the concept of "internal" APIs # We should have a better way to do this by formalizing the concept of "internal" APIs
# and providers, with a way to specify dependencies for them. # and providers, with a way to specify dependencies for them.
if run_config_.get("inference_store"): storage_cfg = run_config_.get("storage", {})
additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"])) for backend_cfg in storage_cfg.get("backends", {}).values():
store_type = backend_cfg.get("type")
if run_config_.get("metadata_store"): if not store_type:
additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"])) continue
if str(store_type).startswith("kv_"):
additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
elif str(store_type).startswith("sql_"):
additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
if self.additional_pip_packages: if self.additional_pip_packages:
additional_pip_packages.extend(self.additional_pip_packages) additional_pip_packages.extend(self.additional_pip_packages)
@ -387,11 +420,13 @@ class DistributionTemplate(BaseModel):
def enum_representer(dumper, data): def enum_representer(dumper, data):
return dumper.represent_scalar("tag:yaml.org,2002:str", data.value) return dumper.represent_scalar("tag:yaml.org,2002:str", data.value)
# Register YAML representer for ModelType # Register YAML representer for enums
yaml.add_representer(ModelType, enum_representer) yaml.add_representer(ModelType, enum_representer)
yaml.add_representer(DatasetPurpose, enum_representer) yaml.add_representer(DatasetPurpose, enum_representer)
yaml.add_representer(StorageBackendType, enum_representer)
yaml.SafeDumper.add_representer(ModelType, enum_representer) yaml.SafeDumper.add_representer(ModelType, enum_representer)
yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer) yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)
yaml.SafeDumper.add_representer(StorageBackendType, enum_representer)
for output_dir in [yaml_output_dir, doc_output_dir]: for output_dir in [yaml_output_dir, doc_output_dir]:
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)

View file

@ -22,9 +22,9 @@ providers:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
kvstore: persistence:
type: sqlite namespace: vector_io::faiss
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db backend: kv_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -34,32 +34,35 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db namespace: agents
responses_store: backend: kv_default
type: sqlite responses:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
kvstore: kvstore:
type: sqlite namespace: eval
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db backend: kv_default
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
provider_type: remote::huggingface provider_type: remote::huggingface
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::huggingface
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db backend: kv_default
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite namespace: datasetio::localfs
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db backend: kv_default
scoring: scoring:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
@ -90,17 +93,28 @@ providers:
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db backend: sql_default
metadata_store: storage:
type: sqlite backends:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db kv_default:
inference_store: type: kv_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db sql_default:
conversations_store: type: sql_sqlite
type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/conversations.db stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: [] shields: []
vector_dbs: [] vector_dbs: []

View file

@ -83,8 +83,8 @@ class MetaReferenceAgentsImpl(Agents):
self.policy = policy self.policy = policy
async def initialize(self) -> None: async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store) self.persistence_store = await kvstore_impl(self.config.persistence.agent_state)
self.responses_store = ResponsesStore(self.config.responses_store, self.policy) self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy)
await self.responses_store.initialize() await self.responses_store.initialize()
self.openai_responses_impl = OpenAIResponsesImpl( self.openai_responses_impl = OpenAIResponsesImpl(
inference_api=self.inference_api, inference_api=self.inference_api,

View file

@ -8,24 +8,30 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.providers.utils.kvstore import KVStoreConfig from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
class AgentPersistenceConfig(BaseModel):
"""Nested persistence configuration for agents."""
agent_state: KVStoreReference
responses: ResponsesStoreReference
class MetaReferenceAgentsImplConfig(BaseModel): class MetaReferenceAgentsImplConfig(BaseModel):
persistence_store: KVStoreConfig persistence: AgentPersistenceConfig
responses_store: SqlStoreConfig
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"persistence_store": SqliteKVStoreConfig.sample_run_config( "persistence": {
__distro_dir__=__distro_dir__, "agent_state": KVStoreReference(
db_name="agents_store.db", backend="kv_default",
), namespace="agents",
"responses_store": SqliteSqlStoreConfig.sample_run_config( ).model_dump(exclude_none=True),
__distro_dir__=__distro_dir__, "responses": ResponsesStoreReference(
db_name="responses_store.db", backend="sql_default",
), table_name="responses",
).model_dump(exclude_none=True),
}
} }

View file

@ -359,6 +359,7 @@ class OpenAIResponsesImpl:
tool_executor=self.tool_executor, tool_executor=self.tool_executor,
safety_api=self.safety_api, safety_api=self.safety_api,
guardrail_ids=guardrail_ids, guardrail_ids=guardrail_ids,
instructions=instructions,
) )
# Stream the response # Stream the response

View file

@ -110,6 +110,7 @@ class StreamingResponseOrchestrator:
text: OpenAIResponseText, text: OpenAIResponseText,
max_infer_iters: int, max_infer_iters: int,
tool_executor, # Will be the tool execution logic from the main class tool_executor, # Will be the tool execution logic from the main class
instructions: str,
safety_api, safety_api,
guardrail_ids: list[str] | None = None, guardrail_ids: list[str] | None = None,
): ):
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
self.accumulated_usage: OpenAIResponseUsage | None = None self.accumulated_usage: OpenAIResponseUsage | None = None
# Track if we've sent a refusal response # Track if we've sent a refusal response
self.violation_detected = False self.violation_detected = False
# system message that is inserted into the model's context
self.instructions = instructions
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream: async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
"""Create a refusal response to replace streaming content.""" """Create a refusal response to replace streaming content."""
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
tools=self.ctx.available_tools(), tools=self.ctx.available_tools(),
error=error, error=error,
usage=self.accumulated_usage, usage=self.accumulated_usage,
instructions=self.instructions,
) )
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:

View file

@ -6,13 +6,13 @@
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.core.storage.datatypes import KVStoreReference
class ReferenceBatchesImplConfig(BaseModel): class ReferenceBatchesImplConfig(BaseModel):
"""Configuration for the Reference Batches implementation.""" """Configuration for the Reference Batches implementation."""
kvstore: KVStoreConfig = Field( kvstore: KVStoreReference = Field(
description="Configuration for the key-value store backend.", description="Configuration for the key-value store backend.",
) )
@ -33,8 +33,8 @@ class ReferenceBatchesImplConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict: def sample_run_config(cls, __distro_dir__: str) -> dict:
return { return {
"kvstore": SqliteKVStoreConfig.sample_run_config( "kvstore": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="batches.db", namespace="batches",
), ).model_dump(exclude_none=True),
} }

View file

@ -7,20 +7,17 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import ( from llama_stack.core.storage.datatypes import KVStoreReference
KVStoreConfig,
SqliteKVStoreConfig,
)
class LocalFSDatasetIOConfig(BaseModel): class LocalFSDatasetIOConfig(BaseModel):
kvstore: KVStoreConfig kvstore: KVStoreReference
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return { return {
"kvstore": SqliteKVStoreConfig.sample_run_config( "kvstore": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="localfs_datasetio.db", namespace="datasetio::localfs",
) ).model_dump(exclude_none=True)
} }

View file

@ -7,20 +7,17 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import ( from llama_stack.core.storage.datatypes import KVStoreReference
KVStoreConfig,
SqliteKVStoreConfig,
)
class MetaReferenceEvalConfig(BaseModel): class MetaReferenceEvalConfig(BaseModel):
kvstore: KVStoreConfig kvstore: KVStoreReference
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return { return {
"kvstore": SqliteKVStoreConfig.sample_run_config( "kvstore": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="meta_reference_eval.db", namespace="eval",
) ).model_dump(exclude_none=True)
} }

View file

@ -8,14 +8,14 @@ from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig from llama_stack.core.storage.datatypes import SqlStoreReference
class LocalfsFilesImplConfig(BaseModel): class LocalfsFilesImplConfig(BaseModel):
storage_dir: str = Field( storage_dir: str = Field(
description="Directory to store uploaded files", description="Directory to store uploaded files",
) )
metadata_store: SqlStoreConfig = Field( metadata_store: SqlStoreReference = Field(
description="SQL store configuration for file metadata", description="SQL store configuration for file metadata",
) )
ttl_secs: int = 365 * 24 * 60 * 60 # 1 year ttl_secs: int = 365 * 24 * 60 * 60 # 1 year
@ -24,8 +24,8 @@ class LocalfsFilesImplConfig(BaseModel):
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}", "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
"metadata_store": SqliteSqlStoreConfig.sample_run_config( "metadata_store": SqlStoreReference(
__distro_dir__=__distro_dir__, backend="sql_default",
db_name="files_metadata.db", table_name="files_metadata",
), ).model_dump(exclude_none=True),
} }

View file

@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl(
provider_id=self.__provider_id__, provider_id=self.__provider_id__,
metadata={ metadata={
"embedding_dimension": 768, "embedding_dimension": 768,
"default_configured": True,
}, },
model_type=ModelType.embedding, model_type=ModelType.embedding,
), ),

View file

@ -12,15 +12,8 @@ from .config import ChromaVectorIOConfig
async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]): async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
from llama_stack.providers.remote.vector_io.chroma.chroma import ( from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter
ChromaVectorIOAdapter,
)
impl = ChromaVectorIOAdapter( impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -8,14 +8,14 @@ from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
@json_schema_type @json_schema_type
class ChromaVectorIOConfig(BaseModel): class ChromaVectorIOConfig(BaseModel):
db_path: str db_path: str
kvstore: KVStoreConfig = Field(description="Config for KV store backend") persistence: KVStoreReference = Field(description="Config for KV store backend")
@classmethod @classmethod
def sample_run_config( def sample_run_config(
@ -23,8 +23,8 @@ class ChromaVectorIOConfig(BaseModel):
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"db_path": db_path, "db_path": db_path,
"kvstore": SqliteKVStoreConfig.sample_run_config( "persistence": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="chroma_inline_registry.db", namespace="vector_io::chroma",
), ).model_dump(exclude_none=True),
} }

View file

@ -16,11 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = FaissVectorIOAdapter( impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -8,22 +8,19 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import ( from llama_stack.core.storage.datatypes import KVStoreReference
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
@json_schema_type @json_schema_type
class FaissVectorIOConfig(BaseModel): class FaissVectorIOConfig(BaseModel):
kvstore: KVStoreConfig persistence: KVStoreReference
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return { return {
"kvstore": SqliteKVStoreConfig.sample_run_config( "persistence": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="faiss_store.db", namespace="vector_io::faiss",
) ).model_dump(exclude_none=True)
} }

View file

@ -17,27 +17,14 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ( from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate
HealthResponse,
HealthStatus,
VectorDBsProtocolPrivate,
)
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
)
from .config import FaissVectorIOConfig from .config import FaissVectorIOConfig
@ -155,12 +142,7 @@ class FaissIndex(EmbeddingIndex):
await self._save_index() await self._save_index()
async def query_vector( async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
self,
embedding: NDArray,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k) distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
chunks = [] chunks = []
scores = [] scores = []
@ -175,12 +157,7 @@ class FaissIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=chunks, scores=scores) return QueryChunksResponse(chunks=chunks, scores=scores)
async def query_keyword( async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
self,
query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
raise NotImplementedError( raise NotImplementedError(
"Keyword search is not supported - underlying DB FAISS does not support this search mode" "Keyword search is not supported - underlying DB FAISS does not support this search mode"
) )
@ -200,21 +177,14 @@ class FaissIndex(EmbeddingIndex):
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
def __init__( def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
self,
config: FaissVectorIOConfig,
inference_api: Inference,
models_api: Models,
files_api: Files | None,
) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorDBWithIndex] = {}
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore) self.kvstore = await kvstore_impl(self.config.persistence)
# Load existing banks from kvstore # Load existing banks from kvstore
start_key = VECTOR_DBS_PREFIX start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff" end_key = f"{VECTOR_DBS_PREFIX}\xff"
@ -252,17 +222,11 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
except Exception as e: except Exception as e:
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
async def register_vector_db( async def register_vector_db(self, vector_db: VectorDB) -> None:
self,
vector_db: VectorDB,
) -> None:
assert self.kvstore is not None assert self.kvstore is not None
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
await self.kvstore.set( await self.kvstore.set(key=key, value=vector_db.model_dump_json())
key=key,
value=vector_db.model_dump_json(),
)
# Store in cache # Store in cache
self.cache[vector_db.identifier] = VectorDBWithIndex( self.cache[vector_db.identifier] = VectorDBWithIndex(
@ -285,12 +249,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
del self.cache[vector_db_id] del self.cache[vector_db_id]
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
async def insert_chunks( async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
self,
vector_db_id: str,
chunks: list[Chunk],
ttl_seconds: int | None = None,
) -> None:
index = self.cache.get(vector_db_id) index = self.cache.get(vector_db_id)
if index is None: if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}") raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
@ -298,10 +257,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
await index.insert_chunks(chunks) await index.insert_chunks(chunks)
async def query_chunks( async def query_chunks(
self, self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
vector_db_id: str,
query: InterleavedContent,
params: dict[str, Any] | None = None,
) -> QueryChunksResponse: ) -> QueryChunksResponse:
index = self.cache.get(vector_db_id) index = self.cache.get(vector_db_id)
if index is None: if index is None:

View file

@ -14,11 +14,6 @@ from .config import MilvusVectorIOConfig
async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]): async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
impl = MilvusVectorIOAdapter( impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -8,25 +8,22 @@ from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import ( from llama_stack.core.storage.datatypes import KVStoreReference
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
@json_schema_type @json_schema_type
class MilvusVectorIOConfig(BaseModel): class MilvusVectorIOConfig(BaseModel):
db_path: str db_path: str
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return { return {
"db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db", "db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db",
"kvstore": SqliteKVStoreConfig.sample_run_config( "persistence": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="milvus_registry.db", namespace="vector_io::milvus",
), ).model_dump(exclude_none=True),
} }

View file

@ -15,11 +15,6 @@ async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]):
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = QdrantVectorIOAdapter( impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -9,23 +9,21 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import ( from llama_stack.core.storage.datatypes import KVStoreReference
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
@json_schema_type @json_schema_type
class QdrantVectorIOConfig(BaseModel): class QdrantVectorIOConfig(BaseModel):
path: str path: str
kvstore: KVStoreConfig persistence: KVStoreReference
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
"kvstore": SqliteKVStoreConfig.sample_run_config( "persistence": KVStoreReference(
__distro_dir__=__distro_dir__, db_name="qdrant_registry.db" backend="kv_default",
), namespace="vector_io::qdrant",
).model_dump(exclude_none=True),
} }

View file

@ -15,11 +15,6 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
from .sqlite_vec import SQLiteVecVectorIOAdapter from .sqlite_vec import SQLiteVecVectorIOAdapter
assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = SQLiteVecVectorIOAdapter( impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -8,22 +8,19 @@ from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import ( from llama_stack.core.storage.datatypes import KVStoreReference
KVStoreConfig,
SqliteKVStoreConfig,
)
class SQLiteVectorIOConfig(BaseModel): class SQLiteVectorIOConfig(BaseModel):
db_path: str = Field(description="Path to the SQLite database file") db_path: str = Field(description="Path to the SQLite database file")
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return { return {
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db", "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
"kvstore": SqliteKVStoreConfig.sample_run_config( "persistence": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="sqlite_vec_registry.db", namespace="vector_io::sqlite_vec",
), ).model_dump(exclude_none=True),
} }

View file

@ -17,13 +17,8 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
@ -175,32 +170,18 @@ class SQLiteVecIndex(EmbeddingIndex):
# Insert vector embeddings # Insert vector embeddings
embedding_data = [ embedding_data = [
( ((chunk.chunk_id, serialize_vector(emb.tolist())))
(
chunk.chunk_id,
serialize_vector(emb.tolist()),
)
)
for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True) for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
] ]
cur.executemany( cur.executemany(f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", embedding_data)
f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);",
embedding_data,
)
# Insert FTS content # Insert FTS content
fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks] fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
# DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT) # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
cur.executemany( cur.executemany(f"DELETE FROM [{self.fts_table}] WHERE id = ?;", [(row[0],) for row in fts_data])
f"DELETE FROM [{self.fts_table}] WHERE id = ?;",
[(row[0],) for row in fts_data],
)
# INSERT new entries # INSERT new entries
cur.executemany( cur.executemany(f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", fts_data)
f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);",
fts_data,
)
connection.commit() connection.commit()
@ -216,12 +197,7 @@ class SQLiteVecIndex(EmbeddingIndex):
# Run batch insertion in a background thread # Run batch insertion in a background thread
await asyncio.to_thread(_execute_all_batch_inserts) await asyncio.to_thread(_execute_all_batch_inserts)
async def query_vector( async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
self,
embedding: NDArray,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
""" """
Performs vector-based search using a virtual table for vector similarity. Performs vector-based search using a virtual table for vector similarity.
""" """
@ -261,12 +237,7 @@ class SQLiteVecIndex(EmbeddingIndex):
scores.append(score) scores.append(score)
return QueryChunksResponse(chunks=chunks, scores=scores) return QueryChunksResponse(chunks=chunks, scores=scores)
async def query_keyword( async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
self,
query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
""" """
Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search. Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search.
""" """
@ -410,22 +381,15 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
""" """
def __init__( def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
self,
config,
inference_api: Inference,
models_api: Models,
files_api: Files | None,
) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorDBWithIndex] = {}
self.vector_db_store = None self.vector_db_store = None
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore) self.kvstore = await kvstore_impl(self.config.persistence)
start_key = VECTOR_DBS_PREFIX start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff" end_key = f"{VECTOR_DBS_PREFIX}\xff"
@ -433,9 +397,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
for db_json in stored_vector_dbs: for db_json in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(db_json) vector_db = VectorDB.model_validate_json(db_json)
index = await SQLiteVecIndex.create( index = await SQLiteVecIndex.create(
vector_db.embedding_dimension, vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
self.config.db_path,
vector_db.identifier,
) )
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
@ -450,11 +412,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
return [v.vector_db for v in self.cache.values()] return [v.vector_db for v in self.cache.values()]
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_db(self, vector_db: VectorDB) -> None:
index = await SQLiteVecIndex.create( index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
vector_db.embedding_dimension,
self.config.db_path,
vector_db.identifier,
)
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:

View file

@ -7,20 +7,17 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import ( from llama_stack.core.storage.datatypes import KVStoreReference
KVStoreConfig,
SqliteKVStoreConfig,
)
class HuggingfaceDatasetIOConfig(BaseModel): class HuggingfaceDatasetIOConfig(BaseModel):
kvstore: KVStoreConfig kvstore: KVStoreReference
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return { return {
"kvstore": SqliteKVStoreConfig.sample_run_config( "kvstore": KVStoreReference(
__distro_dir__=__distro_dir__, backend="kv_default",
db_name="huggingface_datasetio.db", namespace="datasetio::huggingface",
) ).model_dump(exclude_none=True)
} }

Some files were not shown because too many files have changed in this diff Show more