Merge 059d880bc0 into sapling-pr-archive-ehhuang

2025-12-06 18:40:57 +00:00 · 2025-10-20 14:53:48 -07:00 · 2025-10-20 14:53:48 -07:00 · 6cb148dbe6
commit 6cb148dbe6
parent c29b8605b6 059d880bc0
199 changed files with 27713 additions and 7978 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -82,11 +82,14 @@ runs:
          echo "No recording changes"
        fi
-    - name: Write inference logs to file
+    - name: Write docker logs to file
      if: ${{ always() }}
      shell: bash
      run: |
        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
        distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
        stack_container_name="llama-stack-test-$distro_name"
        sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
    - name: Upload logs
      if: ${{ always() }}
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -73,6 +73,24 @@ jobs:
          image_name: kube
          apis: []
          providers: {}
          storage:
            backends:
              kv_default:
                type: kv_sqlite
                db_path: $run_dir/kvstore.db
              sql_default:
                type: sql_sqlite
                db_path: $run_dir/sql_store.db
            stores:
              metadata:
                namespace: registry
                backend: kv_default
              inference:
                table_name: inference_store
                backend: sql_default
              conversations:
                table_name: openai_conversations
                backend: sql_default
          server:
            port: 8321
          EOF
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@ -169,9 +169,7 @@ jobs:
        run: |
          uv run --no-sync \
            pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
-            tests/integration/vector_io \
+            tests/integration/vector_io
            --embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
            --embedding-dimension 768
      - name: Check Storage and Memory Available After Tests
        if: ${{ always() }}
--- a/benchmarking/k8s-benchmark/stack-configmap.yaml
+++ b/benchmarking/k8s-benchmark/stack-configmap.yaml
@ -98,21 +98,30 @@ data:
      - provider_id: model-context-protocol
        provider_type: remote::model-context-protocol
        config: {}
-    metadata_store:
+    storage:
-      type: postgres
+      backends:
-      host: ${env.POSTGRES_HOST:=localhost}
+        kv_default:
-      port: ${env.POSTGRES_PORT:=5432}
+          type: kv_postgres
-      db: ${env.POSTGRES_DB:=llamastack}
+          host: ${env.POSTGRES_HOST:=localhost}
-      user: ${env.POSTGRES_USER:=llamastack}
+          port: ${env.POSTGRES_PORT:=5432}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
+          db: ${env.POSTGRES_DB:=llamastack}
-      table_name: llamastack_kvstore
+          user: ${env.POSTGRES_USER:=llamastack}
-    inference_store:
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
-      type: postgres
+          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-      host: ${env.POSTGRES_HOST:=localhost}
+        sql_default:
-      port: ${env.POSTGRES_PORT:=5432}
+          type: sql_postgres
-      db: ${env.POSTGRES_DB:=llamastack}
+          host: ${env.POSTGRES_HOST:=localhost}
-      user: ${env.POSTGRES_USER:=llamastack}
+          port: ${env.POSTGRES_PORT:=5432}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
+          db: ${env.POSTGRES_DB:=llamastack}
          user: ${env.POSTGRES_USER:=llamastack}
          password: ${env.POSTGRES_PASSWORD:=llamastack}
      references:
        metadata:
          backend: kv_default
          namespace: registry
        inference:
          backend: sql_default
          table_name: inference_store
    models:
    - metadata:
        embedding_dimension: 768
@ -137,5 +146,4 @@ data:
      port: 8323
 kind: ConfigMap
 metadata:
  creationTimestamp: null
  name: llama-stack-config
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@ -95,21 +95,30 @@ providers:
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
-metadata_store:
+storage:
-  type: postgres
+  backends:
-  host: ${env.POSTGRES_HOST:=localhost}
+    kv_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: kv_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-  table_name: llamastack_kvstore
+      user: ${env.POSTGRES_USER:=llamastack}
-inference_store:
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
-  type: postgres
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-  host: ${env.POSTGRES_HOST:=localhost}
+    sql_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: sql_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
  references:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
 models:
 - metadata:
    embedding_dimension: 768
--- a/client-sdks/stainless/README.md
+++ b/client-sdks/stainless/README.md
@ -0,0 +1,8 @@
 These are the source-of-truth configuration files used to generate the Stainless client SDKs via Stainless.
 - `openapi.yml`: this is the OpenAPI specification for the Llama Stack API.
 - `openapi.stainless.yml`: this is the Stainless _configuration_ which instructs Stainless how to generate the client SDKs.
 A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files.
 These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `run_openapi_generator.sh` script.
--- a/client-sdks/stainless/openapi.stainless.yml
+++ b/client-sdks/stainless/openapi.stainless.yml
@ -0,0 +1,608 @@
 # yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
 organization:
  # Name of your organization or company, used to determine the name of the client
  # and headings.
  name: llama-stack-client
  docs: https://llama-stack.readthedocs.io/en/latest/
  contact: llamastack@meta.com
 security:
  - {}
  - BearerAuth: []
 security_schemes:
  BearerAuth:
    type: http
    scheme: bearer
 # `targets` define the output targets and their customization options, such as
 # whether to emit the Node SDK and what it's package name should be.
 targets:
  node:
    package_name: llama-stack-client
    production_repo: llamastack/llama-stack-client-typescript
    publish:
      npm: false
  python:
    package_name: llama_stack_client
    production_repo: llamastack/llama-stack-client-python
    options:
      use_uv: true
    publish:
      pypi: true
    project_name: llama_stack_client
  kotlin:
    reverse_domain: com.llama_stack_client.api
    production_repo: null
    publish:
      maven: false
  go:
    package_name: llama-stack-client
    production_repo: llamastack/llama-stack-client-go
    options:
      enable_v2: true
      back_compat_use_shared_package: false
 # `client_settings` define settings for the API client, such as extra constructor
 # arguments (used for authentication), retry behavior, idempotency, etc.
 client_settings:
  default_env_prefix: LLAMA_STACK_CLIENT
  opts:
    api_key:
      type: string
      read_env: LLAMA_STACK_CLIENT_API_KEY
      auth: { security_scheme: BearerAuth }
      nullable: true
 # `environments` are a map of the name of the environment (e.g. "sandbox",
 # "production") to the corresponding url to use.
 environments:
  production: http://any-hosted-llama-stack.com
 # `pagination` defines [pagination schemes] which provides a template to match
 # endpoints and generate next-page and auto-pagination helpers in the SDKs.
 pagination:
  - name: datasets_iterrows
    type: offset
    request:
      dataset_id:
        type: string
      start_index:
        type: integer
        x-stainless-pagination-property:
          purpose: offset_count_param
      limit:
        type: integer
    response:
      data:
        type: array
        items:
          type: object
      next_index:
        type: integer
        x-stainless-pagination-property:
          purpose: offset_count_start_field
  - name: openai_cursor_page
    type: cursor
    request:
      limit:
        type: integer
      after:
        type: string
        x-stainless-pagination-property:
          purpose: next_cursor_param
    response:
      data:
        type: array
        items: {}
      has_more:
        type: boolean
      last_id:
        type: string
        x-stainless-pagination-property:
          purpose: next_cursor_field
 # `resources` define the structure and organziation for your API, such as how
 # methods and models are grouped together and accessed. See the [configuration
 # guide] for more information.
 #
 # [configuration guide]:
 #   https://app.stainlessapi.com/docs/guides/configure#resources
 resources:
  $shared:
    models:
      agent_config: AgentConfig
      interleaved_content_item: InterleavedContentItem
      interleaved_content: InterleavedContent
      param_type: ParamType
      safety_violation: SafetyViolation
      sampling_params: SamplingParams
      scoring_result: ScoringResult
      message: Message
      user_message: UserMessage
      completion_message: CompletionMessage
      tool_response_message: ToolResponseMessage
      system_message: SystemMessage
      tool_call: ToolCall
      query_result: RAGQueryResult
      document: RAGDocument
      query_config: RAGQueryConfig
      response_format: ResponseFormat
  toolgroups:
    models:
      tool_group: ToolGroup
      list_tool_groups_response: ListToolGroupsResponse
    methods:
      register: post /v1/toolgroups
      get: get /v1/toolgroups/{toolgroup_id}
      list: get /v1/toolgroups
      unregister: delete /v1/toolgroups/{toolgroup_id}
  tools:
    methods:
      get: get /v1/tools/{tool_name}
      list:
        endpoint: get /v1/tools
        paginated: false
  tool_runtime:
    models:
      tool_def: ToolDef
      tool_invocation_result: ToolInvocationResult
    methods:
      list_tools:
        endpoint: get /v1/tool-runtime/list-tools
        paginated: false
      invoke_tool: post /v1/tool-runtime/invoke
    subresources:
      rag_tool:
        methods:
          insert: post /v1/tool-runtime/rag-tool/insert
          query: post /v1/tool-runtime/rag-tool/query
  responses:
    models:
      response_object_stream: OpenAIResponseObjectStream
      response_object: OpenAIResponseObject
    methods:
      create:
        type: http
        endpoint: post /v1/responses
        streaming:
          stream_event_model: responses.response_object_stream
          param_discriminator: stream
      retrieve: get /v1/responses/{response_id}
      list:
        type: http
        endpoint: get /v1/responses
      delete:
        type: http
        endpoint: delete /v1/responses/{response_id}
    subresources:
      input_items:
        methods:
          list:
            type: http
            endpoint: get /v1/responses/{response_id}/input_items
  conversations:
    models:
      conversation_object: Conversation
    methods:
      create:
        type: http
        endpoint: post /v1/conversations
      retrieve: get /v1/conversations/{conversation_id}
      update:
        type: http
        endpoint: post /v1/conversations/{conversation_id}
      delete:
        type: http
        endpoint: delete /v1/conversations/{conversation_id}
    subresources:
      items:
        methods:
          get:
            type: http
            endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
          list:
            type: http
            endpoint: get /v1/conversations/{conversation_id}/items
          create:
            type: http
            endpoint: post /v1/conversations/{conversation_id}/items
  datasets:
    models:
      list_datasets_response: ListDatasetsResponse
    methods:
      register: post /v1beta/datasets
      retrieve: get /v1beta/datasets/{dataset_id}
      list:
        endpoint: get /v1beta/datasets
        paginated: false
      unregister: delete /v1beta/datasets/{dataset_id}
      iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
      appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
  inspect:
    models:
      healthInfo: HealthInfo
      providerInfo: ProviderInfo
      routeInfo: RouteInfo
      versionInfo: VersionInfo
    methods:
      health: get /v1/health
      version: get /v1/version
  embeddings:
    models:
      create_embeddings_response: OpenAIEmbeddingsResponse
    methods:
      create: post /v1/embeddings
  chat:
    models:
      chat_completion_chunk: OpenAIChatCompletionChunk
    subresources:
      completions:
        methods:
          create:
            type: http
            endpoint: post /v1/chat/completions
            streaming:
              stream_event_model: chat.chat_completion_chunk
              param_discriminator: stream
          list:
            type: http
            endpoint: get /v1/chat/completions
          retrieve:
            type: http
            endpoint: get /v1/chat/completions/{completion_id}
  completions:
    methods:
      create:
        type: http
        endpoint: post /v1/completions
        streaming:
          param_discriminator: stream
  vector_io:
    models:
      queryChunksResponse: QueryChunksResponse
    methods:
      insert: post /v1/vector-io/insert
      query: post /v1/vector-io/query
  vector_stores:
    models:
      vector_store: VectorStoreObject
      list_vector_stores_response: VectorStoreListResponse
      vector_store_delete_response: VectorStoreDeleteResponse
      vector_store_search_response: VectorStoreSearchResponsePage
    methods:
      create: post /v1/vector_stores
      list:
        endpoint: get /v1/vector_stores
      retrieve: get /v1/vector_stores/{vector_store_id}
      update: post /v1/vector_stores/{vector_store_id}
      delete: delete /v1/vector_stores/{vector_store_id}
      search: post /v1/vector_stores/{vector_store_id}/search
    subresources:
      files:
        models:
          vector_store_file: VectorStoreFileObject
        methods:
          list: get /v1/vector_stores/{vector_store_id}/files
          retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
          update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
          delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
          create: post /v1/vector_stores/{vector_store_id}/files
          content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
      file_batches:
        models:
          vector_store_file_batches: VectorStoreFileBatchObject
          list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
        methods:
          create: post /v1/vector_stores/{vector_store_id}/file_batches
          retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
          list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
          cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
  models:
    models:
      model: Model
      list_models_response: ListModelsResponse
    methods:
      retrieve: get /v1/models/{model_id}
      list:
        endpoint: get /v1/models
        paginated: false
      register: post /v1/models
      unregister: delete /v1/models/{model_id}
    subresources:
      openai:
        methods:
          list:
            endpoint: get /v1/models
            paginated: false
  providers:
    models:
      list_providers_response: ListProvidersResponse
    methods:
      list:
        endpoint: get /v1/providers
        paginated: false
      retrieve: get /v1/providers/{provider_id}
  routes:
    models:
      list_routes_response: ListRoutesResponse
    methods:
      list:
        endpoint: get /v1/inspect/routes
        paginated: false
  moderations:
    models:
      create_response: ModerationObject
    methods:
      create: post /v1/moderations
  safety:
    models:
      run_shield_response: RunShieldResponse
    methods:
      run_shield: post /v1/safety/run-shield
  shields:
    models:
      shield: Shield
      list_shields_response: ListShieldsResponse
    methods:
      retrieve: get /v1/shields/{identifier}
      list:
        endpoint: get /v1/shields
        paginated: false
      register: post /v1/shields
      delete: delete /v1/shields/{identifier}
  synthetic_data_generation:
    models:
      syntheticDataGenerationResponse: SyntheticDataGenerationResponse
    methods:
      generate: post /v1/synthetic-data-generation/generate
  telemetry:
    models:
      span_with_status: SpanWithStatus
      trace: Trace
      query_spans_response: QuerySpansResponse
      event: Event
      query_condition: QueryCondition
    methods:
      query_traces:
        endpoint: post /v1alpha/telemetry/traces
        skip_test_reason: 'unsupported query params in java / kotlin'
      get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
      query_spans:
        endpoint: post /v1alpha/telemetry/spans
        skip_test_reason: 'unsupported query params in java / kotlin'
      query_metrics:
        endpoint: post /v1alpha/telemetry/metrics/{metric_name}
        skip_test_reason: 'unsupported query params in java / kotlin'
      # log_event: post /v1alpha/telemetry/events
      save_spans_to_dataset: post /v1alpha/telemetry/spans/export
      get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
      get_trace: get /v1alpha/telemetry/traces/{trace_id}
  scoring:
    methods:
      score: post /v1/scoring/score
      score_batch: post /v1/scoring/score-batch
  scoring_functions:
    methods:
      retrieve: get /v1/scoring-functions/{scoring_fn_id}
      list:
        endpoint: get /v1/scoring-functions
        paginated: false
      register: post /v1/scoring-functions
    models:
      scoring_fn: ScoringFn
      scoring_fn_params: ScoringFnParams
      list_scoring_functions_response: ListScoringFunctionsResponse
  benchmarks:
    methods:
      retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
      list:
        endpoint: get /v1alpha/eval/benchmarks
        paginated: false
      register: post /v1alpha/eval/benchmarks
    models:
      benchmark: Benchmark
      list_benchmarks_response: ListBenchmarksResponse
  files:
    methods:
      create: post /v1/files
      list: get /v1/files
      retrieve: get /v1/files/{file_id}
      delete: delete /v1/files/{file_id}
      content: get /v1/files/{file_id}/content
    models:
      file: OpenAIFileObject
      list_files_response: ListOpenAIFileResponse
      delete_file_response: OpenAIFileDeleteResponse
  alpha:
    subresources:
      inference:
        methods:
          rerank: post /v1alpha/inference/rerank
      post_training:
        models:
          algorithm_config: AlgorithmConfig
          post_training_job: PostTrainingJob
          list_post_training_jobs_response: ListPostTrainingJobsResponse
        methods:
          preference_optimize: post /v1alpha/post-training/preference-optimize
          supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
        subresources:
          job:
            methods:
              artifacts: get /v1alpha/post-training/job/artifacts
              cancel: post /v1alpha/post-training/job/cancel
              status: get /v1alpha/post-training/job/status
              list:
                endpoint: get /v1alpha/post-training/jobs
                paginated: false
      eval:
        methods:
          evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
          run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
          evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
          run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
        subresources:
          jobs:
            methods:
              cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
              status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
              retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
        models:
          evaluate_response: EvaluateResponse
          benchmark_config: BenchmarkConfig
          job: Job
      agents:
        methods:
          create: post /v1alpha/agents
          list: get /v1alpha/agents
          retrieve: get /v1alpha/agents/{agent_id}
          delete: delete /v1alpha/agents/{agent_id}
        models:
          inference_step: InferenceStep
          tool_execution_step: ToolExecutionStep
          tool_response: ToolResponse
          shield_call_step: ShieldCallStep
          memory_retrieval_step: MemoryRetrievalStep
        subresources:
          session:
            models:
              session: Session
            methods:
              list: get /v1alpha/agents/{agent_id}/sessions
              create: post /v1alpha/agents/{agent_id}/session
              delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
              retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
          steps:
            methods:
              retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
          turn:
            models:
              turn: Turn
              turn_response_event: AgentTurnResponseEvent
              agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
            methods:
              create:
                type: http
                endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
                streaming:
                  stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
                  param_discriminator: stream
              retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
              resume:
                type: http
                endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
                streaming:
                  stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
                  param_discriminator: stream
 settings:
  license: MIT
  unwrap_response_fields: [ data ]
 openapi:
  transformations:
    - command: renameValue
      reason: pydantic reserved name
      args:
        filter:
          only:
            - '$.components.schemas.InferenceStep.properties.model_response'
        rename:
          python:
            property_name: 'inference_model_response'
    # - command: renameValue
    #   reason: pydantic reserved name
    #   args:
    #     filter:
    #       only:
    #         - '$.components.schemas.Model.properties.model_type'
    #     rename:
    #       python:
    #         property_name: 'type'
    - command: mergeObject
      reason: Better return_type using enum
      args:
        target:
          - '$.components.schemas'
        object:
          ReturnType:
            additionalProperties: false
            properties:
              type:
                enum:
                  - string
                  - number
                  - boolean
                  - array
                  - object
                  - json
                  - union
                  - chat_completion_input
                  - completion_input
                  - agent_turn_input
            required:
              - type
            type: object
    - command: replaceProperties
      reason: Replace return type properties with better model (see above)
      args:
        filter:
          only:
            - '$.components.schemas.ScoringFn.properties.return_type'
            - '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
        value:
          $ref: '#/components/schemas/ReturnType'
    - command: oneOfToAnyOf
      reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
    - reason: For better names
      command: extractToRefs
      args:
        ref:
          target: '$.components.schemas.ToolCallDelta.properties.tool_call'
          name: '#/components/schemas/ToolCallOrString'
 # `readme` is used to configure the code snippets that will be rendered in the
 # README.md of various SDKs. In particular, you can change the `headline`
 # snippet's endpoint and the arguments to call it with.
 readme:
  example_requests:
    default:
      type: request
      endpoint: post /v1/chat/completions
      params: &ref_0 {}
    headline:
      type: request
      endpoint: post /v1/models
      params: *ref_0
    pagination:
      type: request
      endpoint: post /v1/chat/completions
      params: {}
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
--- a/containers/Containerfile
+++ b/containers/Containerfile
@ -60,6 +60,17 @@ ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH}
 # Copy the repository so editable installs and run configurations are available.
 COPY . /workspace
 # Install the client package if it is provided
 # NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python
 RUN set -eux; \
    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
        if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
            echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
            exit 1; \
        fi; \
        uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
    fi;
 # Install llama-stack
 RUN set -eux; \
    if [ "$INSTALL_MODE" = "editable" ]; then \
@ -83,16 +94,6 @@ RUN set -eux; \
        fi; \
    fi;
 # Install the client package if it is provided
 RUN set -eux; \
    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
        if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
            echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
            exit 1; \
        fi; \
        uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
    fi;
 # Install the dependencies for the distribution
 RUN set -eux; \
    if [ -z "$DISTRO_NAME" ]; then \
--- a/docs/docs/building_applications/rag.mdx
+++ b/docs/docs/building_applications/rag.mdx
@ -88,18 +88,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through:
 To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:
 ```yaml
-models:
+vector_stores:
-  - model_id: nomic-ai/nomic-embed-text-v1.5
+  default_provider_id: faiss
-    provider_id: inline::sentence-transformers
+  default_embedding_model:
-    metadata:
+    provider_id: sentence-transformers
-      embedding_dimension: 768
+    model_id: nomic-ai/nomic-embed-text-v1.5
      default_configured: true
 ```
 With this configuration:
- `client.vector_stores.create()` works without requiring embedding model parameters
+- `client.vector_stores.create()` works without requiring embedding model or provider parameters
- The system automatically uses the default model and its embedding dimension for any newly created vector store
+- The system automatically uses the default vector store provider (`faiss`) when multiple providers are available
- Only one model can be marked as `default_configured: true`
+- The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store
 - The `default_provider_id` specifies which vector storage backend to use
 - The `default_embedding_model` specifies both the inference provider and model for embeddings
 ## Vector Store Operations
@ -108,14 +109,15 @@ With this configuration:
 You can create vector stores with automatic or explicit embedding model selection:
 ```python
-# Automatic - uses default configured embedding model
+# Automatic - uses default configured embedding model and vector store provider
 vs = client.vector_stores.create()
-# Explicit - specify embedding model when you need a specific one
+# Explicit - specify embedding model and/or provider when you need specific ones
 vs = client.vector_stores.create(
    extra_body={
-        "embedding_model": "nomic-ai/nomic-embed-text-v1.5",
+        "provider_id": "faiss",  # Optional: specify vector store provider
-        "embedding_dimension": 768
+        "embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5",
        "embedding_dimension": 768  # Optional: will be auto-detected if not provided
    }
 )
 ```
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@ -44,18 +44,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        namespace: null
+          backend: kv_default
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
+          namespace: agents
        responses:
          backend: sql_default
          table_name: responses
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
-metadata_store:
+storage:
-  namespace: null
+  backends:
-  type: sqlite
+    kv_default:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
+      type: kv_sqlite
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db
    sql_default:
      type: sql_sqlite
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
  references:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@ -1,56 +1,155 @@
 apiVersion: v1
 data:
-  stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n-
+  stack_run_config.yaml: |
-    inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n
+    version: '2'
-    \ inference:\n  - provider_id: vllm-inference\n    provider_type: remote::vllm\n
+    image_name: kubernetes-demo
-    \   config:\n      url: ${env.VLLM_URL:=http://localhost:8000/v1}\n      max_tokens:
+    apis:
-    ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n      tls_verify:
+    - agents
-    ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: vllm-safety\n    provider_type:
+    - inference
-    remote::vllm\n    config:\n      url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n
+    - files
-    \     max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n
+    - safety
-    \     tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: sentence-transformers\n
+    - telemetry
-    \   provider_type: inline::sentence-transformers\n    config: {}\n  vector_io:\n
+    - tool_runtime
-    \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n    provider_type: remote::chromadb\n
+    - vector_io
-    \   config:\n      url: ${env.CHROMADB_URL:=}\n      kvstore:\n        type: postgres\n
+    providers:
-    \       host: ${env.POSTGRES_HOST:=localhost}\n        port: ${env.POSTGRES_PORT:=5432}\n
+      inference:
-    \       db: ${env.POSTGRES_DB:=llamastack}\n        user: ${env.POSTGRES_USER:=llamastack}\n
+      - provider_id: vllm-inference
-    \       password: ${env.POSTGRES_PASSWORD:=llamastack}\n  files:\n  - provider_id:
+        provider_type: remote::vllm
-    meta-reference-files\n    provider_type: inline::localfs\n    config:\n      storage_dir:
+        config:
-    ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n      metadata_store:\n
+          url: ${env.VLLM_URL:=http://localhost:8000/v1}
-    \       type: sqlite\n        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-    \ \n  safety:\n  - provider_id: llama-guard\n    provider_type: inline::llama-guard\n
+          api_token: ${env.VLLM_API_TOKEN:=fake}
-    \   config:\n      excluded_categories: []\n  agents:\n  - provider_id: meta-reference\n
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-    \   provider_type: inline::meta-reference\n    config:\n      persistence_store:\n
+      - provider_id: vllm-safety
-    \       type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n        port:
+        provider_type: remote::vllm
-    ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n        user:
+        config:
-    ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
+          url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
-    \     responses_store:\n        type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-    \       port: ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n
+          api_token: ${env.VLLM_API_TOKEN:=fake}
-    \       user: ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-    \ telemetry:\n  - provider_id: meta-reference\n    provider_type: inline::meta-reference\n
+      - provider_id: sentence-transformers
-    \   config:\n      service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n      sinks:
+        provider_type: inline::sentence-transformers
-    ${env.TELEMETRY_SINKS:=console}\n  tool_runtime:\n  - provider_id: brave-search\n
+        config: {}
-    \   provider_type: remote::brave-search\n    config:\n      api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n
+      vector_io:
-    \     max_results: 3\n  - provider_id: tavily-search\n    provider_type: remote::tavily-search\n
+      - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
-    \   config:\n      api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n      max_results:
+        provider_type: remote::chromadb
-    3\n  - provider_id: rag-runtime\n    provider_type: inline::rag-runtime\n    config:
+        config:
-    {}\n  - provider_id: model-context-protocol\n    provider_type: remote::model-context-protocol\n
+          url: ${env.CHROMADB_URL:=}
-    \   config: {}\nmetadata_store:\n  type: postgres\n  host: ${env.POSTGRES_HOST:=localhost}\n
+          kvstore:
-    \ port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n  user:
+            type: postgres
-    ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\n
+            host: ${env.POSTGRES_HOST:=localhost}
-    \ table_name: llamastack_kvstore\ninference_store:\n  type: postgres\n  host:
+            port: ${env.POSTGRES_PORT:=5432}
-    ${env.POSTGRES_HOST:=localhost}\n  port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n
+            db: ${env.POSTGRES_DB:=llamastack}
-    \ user: ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n-
+            user: ${env.POSTGRES_USER:=llamastack}
-    metadata:\n    embedding_dimension: 384\n  model_id: all-MiniLM-L6-v2\n  provider_id:
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
-    sentence-transformers\n  model_type: embedding\n- metadata: {}\n  model_id: ${env.INFERENCE_MODEL}\n
+      files:
-    \ provider_id: vllm-inference\n  model_type: llm\n- metadata: {}\n  model_id:
+      - provider_id: meta-reference-files
-    ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n  provider_id: vllm-safety\n
+        provider_type: inline::localfs
-    \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs:
+        config:
-    []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id:
+          storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
-    builtin::websearch\n  provider_id: tavily-search\n- toolgroup_id: builtin::rag\n
+          metadata_store:
-    \ provider_id: rag-runtime\nserver:\n  port: 8321\n  auth:\n    provider_config:\n
+            type: sqlite
-    \     type: github_token\n"
+            db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
      safety:
      - provider_id: llama-guard
        provider_type: inline::llama-guard
        config:
          excluded_categories: []
      agents:
      - provider_id: meta-reference
        provider_type: inline::meta-reference
        config:
          persistence_store:
            type: postgres
            host: ${env.POSTGRES_HOST:=localhost}
            port: ${env.POSTGRES_PORT:=5432}
            db: ${env.POSTGRES_DB:=llamastack}
            user: ${env.POSTGRES_USER:=llamastack}
            password: ${env.POSTGRES_PASSWORD:=llamastack}
          responses_store:
            type: postgres
            host: ${env.POSTGRES_HOST:=localhost}
            port: ${env.POSTGRES_PORT:=5432}
            db: ${env.POSTGRES_DB:=llamastack}
            user: ${env.POSTGRES_USER:=llamastack}
            password: ${env.POSTGRES_PASSWORD:=llamastack}
      telemetry:
      - provider_id: meta-reference
        provider_type: inline::meta-reference
        config:
          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
          sinks: ${env.TELEMETRY_SINKS:=console}
      tool_runtime:
      - provider_id: brave-search
        provider_type: remote::brave-search
        config:
          api_key: ${env.BRAVE_SEARCH_API_KEY:+}
          max_results: 3
      - provider_id: tavily-search
        provider_type: remote::tavily-search
        config:
          api_key: ${env.TAVILY_SEARCH_API_KEY:+}
          max_results: 3
      - provider_id: rag-runtime
        provider_type: inline::rag-runtime
        config: {}
      - provider_id: model-context-protocol
        provider_type: remote::model-context-protocol
        config: {}
    storage:
      backends:
        kv_default:
          type: kv_postgres
          host: ${env.POSTGRES_HOST:=localhost}
          port: ${env.POSTGRES_PORT:=5432}
          db: ${env.POSTGRES_DB:=llamastack}
          user: ${env.POSTGRES_USER:=llamastack}
          password: ${env.POSTGRES_PASSWORD:=llamastack}
          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
        sql_default:
          type: sql_postgres
          host: ${env.POSTGRES_HOST:=localhost}
          port: ${env.POSTGRES_PORT:=5432}
          db: ${env.POSTGRES_DB:=llamastack}
          user: ${env.POSTGRES_USER:=llamastack}
          password: ${env.POSTGRES_PASSWORD:=llamastack}
      references:
        metadata:
          backend: kv_default
          namespace: registry
        inference:
          backend: sql_default
          table_name: inference_store
    models:
    - metadata:
        embedding_dimension: 768
      model_id: nomic-embed-text-v1.5
      provider_id: sentence-transformers
      model_type: embedding
    - metadata: {}
      model_id: ${env.INFERENCE_MODEL}
      provider_id: vllm-inference
      model_type: llm
    - metadata: {}
      model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
      provider_id: vllm-safety
      model_type: llm
    shields:
    - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
    vector_dbs: []
    datasets: []
    scoring_fns: []
    benchmarks: []
    tool_groups:
    - toolgroup_id: builtin::websearch
      provider_id: tavily-search
    - toolgroup_id: builtin::rag
      provider_id: rag-runtime
    server:
      port: 8321
      auth:
        provider_config:
          type: github_token
 kind: ConfigMap
 metadata:
  creationTimestamp: null
  name: llama-stack-config
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@ -93,21 +93,30 @@ providers:
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
-metadata_store:
+storage:
-  type: postgres
+  backends:
-  host: ${env.POSTGRES_HOST:=localhost}
+    kv_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: kv_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-  table_name: llamastack_kvstore
+      user: ${env.POSTGRES_USER:=llamastack}
-inference_store:
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
-  type: postgres
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-  host: ${env.POSTGRES_HOST:=localhost}
+    sql_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: sql_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
  references:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
 models:
 - metadata:
    embedding_dimension: 768
--- a/docs/docs/providers/agents/inline_meta-reference.mdx
+++ b/docs/docs/providers/agents/inline_meta-reference.mdx
@ -14,16 +14,18 @@ Meta's reference implementation of an agent system that can use tools, access ve
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No |  |  |
 | `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite |  |
 ## Sample Configuration
 ```yaml
-persistence_store:
+persistence:
-  type: sqlite
+  agent_state:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
+    namespace: agents
-responses_store:
+    backend: kv_default
-  type: sqlite
+  responses:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
+    table_name: responses
    backend: sql_default
    max_write_queue_size: 10000
    num_writers: 4
 ```
--- a/docs/docs/providers/batches/inline_reference.mdx
+++ b/docs/docs/providers/batches/inline_reference.mdx
@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Configuration for the key-value store backend. |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Configuration for the key-value store backend. |
 | `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
 | `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
@ -22,6 +22,6 @@ Reference implementation of batches API with KVStore persistence.
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: batches
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/datasetio/inline_localfs.mdx
+++ b/docs/docs/providers/datasetio/inline_localfs.mdx
@ -14,12 +14,12 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: datasetio::localfs
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/datasetio/remote_huggingface.mdx
+++ b/docs/docs/providers/datasetio/remote_huggingface.mdx
@ -14,12 +14,12 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: datasetio::huggingface
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/eval/inline_meta-reference.mdx
+++ b/docs/docs/providers/eval/inline_meta-reference.mdx
@ -14,12 +14,12 @@ Meta's reference implementation of evaluation tasks with support for multiple la
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: eval
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/files/inline_localfs.mdx
+++ b/docs/docs/providers/files/inline_localfs.mdx
@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `storage_dir` | `<class 'str'>` | No |  | Directory to store uploaded files |
-| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
+| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
 | `ttl_secs` | `<class 'int'>` | No | 31536000 |  |
 ## Sample Configuration
@ -23,6 +23,6 @@ Local filesystem-based file storage provider for managing files and documents lo
 ```yaml
 storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
 metadata_store:
-  type: sqlite
+  table_name: files_metadata
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
+  backend: sql_default
 ```
--- a/docs/docs/providers/files/remote_s3.mdx
+++ b/docs/docs/providers/files/remote_s3.mdx
@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad
 | `aws_secret_access_key` | `str \| None` | No |  | AWS secret access key (optional if using IAM roles) |
 | `endpoint_url` | `str \| None` | No |  | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
 | `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
-| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
+| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
 ## Sample Configuration
@ -32,6 +32,6 @@ aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
 endpoint_url: ${env.S3_ENDPOINT_URL:=}
 auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false}
 metadata_store:
-  type: sqlite
+  table_name: s3_files_metadata
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db
+  backend: sql_default
 ```
--- a/docs/docs/providers/vector_io/inline_chromadb.mdx
+++ b/docs/docs/providers/vector_io/inline_chromadb.mdx
@ -79,13 +79,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 ## Sample Configuration
 ```yaml
 db_path: ${env.CHROMADB_PATH}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::chroma
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_faiss.mdx
+++ b/docs/docs/providers/vector_io/inline_faiss.mdx
@ -95,12 +95,12 @@ more details about Faiss in general.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::faiss
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_meta-reference.mdx
+++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx
@ -14,14 +14,14 @@ Meta's reference implementation of a vector database.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::faiss
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+  backend: kv_default
 ```
 ## Deprecation Notice
--- a/docs/docs/providers/vector_io/inline_milvus.mdx
+++ b/docs/docs/providers/vector_io/inline_milvus.mdx
@ -17,14 +17,14 @@ Please refer to the remote provider documentation.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
 ## Sample Configuration
 ```yaml
 db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::milvus
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_qdrant.mdx
+++ b/docs/docs/providers/vector_io/inline_qdrant.mdx
@ -98,13 +98,13 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::qdrant
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -408,13 +408,13 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
 ```yaml
 db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::sqlite_vec
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
@ -17,15 +17,15 @@ Please refer to the sqlite-vec provider documentation.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
 ```yaml
 db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::sqlite_vec
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
+  backend: kv_default
 ```
 ## Deprecation Notice
--- a/docs/docs/providers/vector_io/remote_chromadb.mdx
+++ b/docs/docs/providers/vector_io/remote_chromadb.mdx
@ -78,13 +78,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `url` | `str \| None` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 ## Sample Configuration
 ```yaml
 url: ${env.CHROMADB_URL}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::chroma_remote
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_milvus.mdx
+++ b/docs/docs/providers/vector_io/remote_milvus.mdx
@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
 | `uri` | `<class 'str'>` | No |  | The URI of the Milvus server |
 | `token` | `str \| None` | No |  | The token of the Milvus server |
 | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
 :::note
@ -420,7 +420,7 @@ This configuration class accepts additional fields beyond those listed above. Yo
 ```yaml
 uri: ${env.MILVUS_ENDPOINT}
 token: ${env.MILVUS_TOKEN}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::milvus_remote
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_pgvector.mdx
+++ b/docs/docs/providers/vector_io/remote_pgvector.mdx
@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
 | `db` | `str \| None` | No | postgres |  |
 | `user` | `str \| None` | No | postgres |  |
 | `password` | `str \| None` | No | mysecretpassword |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
@ -228,7 +228,7 @@ port: ${env.PGVECTOR_PORT:=5432}
 db: ${env.PGVECTOR_DB}
 user: ${env.PGVECTOR_USER}
 password: ${env.PGVECTOR_PASSWORD}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::pgvector
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_qdrant.mdx
+++ b/docs/docs/providers/vector_io/remote_qdrant.mdx
@ -26,13 +26,13 @@ Please refer to the inline provider documentation.
 | `prefix` | `str \| None` | No |  |  |
 | `timeout` | `int \| None` | No |  |  |
 | `host` | `str \| None` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 api_key: ${env.QDRANT_API_KEY:=}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::qdrant_remote
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_weaviate.mdx
+++ b/docs/docs/providers/vector_io/remote_weaviate.mdx
@ -75,14 +75,14 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
 |-------|------|----------|---------|-------------|
 | `weaviate_api_key` | `str \| None` | No |  | The API key for the Weaviate instance |
 | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
 ```yaml
 weaviate_api_key: null
 weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::weaviate
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db
+  backend: kv_default
 ```
--- a/docs/openapi_generator/run_openapi_generator.sh
+++ b/docs/openapi_generator/run_openapi_generator.sh
@ -30,3 +30,5 @@ fi
 stack_dir=$(dirname $(dirname $THIS_DIR))
 PYTHONPATH=$PYTHONPATH:$stack_dir \
  python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/static
 cp $stack_dir/docs/static/stainless-llama-stack-spec.yaml $stack_dir/client-sdks/stainless/openapi.yml
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -9024,6 +9024,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9901,6 +9905,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -6734,6 +6734,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7403,6 +7407,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -7600,6 +7600,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -8148,6 +8152,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -5815,6 +5815,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -6218,6 +6222,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -9272,6 +9272,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9820,6 +9824,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -7028,6 +7028,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7431,6 +7435,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
    :param tools: (Optional) An array of tools the model may call while generating a response.
    :param truncation: (Optional) Truncation strategy applied to the response
    :param usage: (Optional) Token usage information for the response
    :param instructions: (Optional) System message inserted into the model's context
    """
    created_at: int
@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
    tools: list[OpenAIResponseTool] | None = None
    truncation: str | None = None
    usage: OpenAIResponseUsage | None = None
    instructions: str | None = None
@json_schema_type
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
    models = "models"
    shields = "shields"
    vector_dbs = "vector_dbs"  # only used for routing
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    benchmarks = "benchmarks"
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Literal
+from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
@ -59,3 +59,35 @@ class ListVectorDBsResponse(BaseModel):
    """
    data: list[VectorDB]
@runtime_checkable
 class VectorDBs(Protocol):
    """Internal protocol for vector_dbs routing - no public API endpoints."""
    async def list_vector_dbs(self) -> ListVectorDBsResponse:
        """Internal method to list vector databases."""
        ...
    async def get_vector_db(
        self,
        vector_db_id: str,
    ) -> VectorDB:
        """Internal method to get a vector database by ID."""
        ...
    async def register_vector_db(
        self,
        vector_db_id: str,
        embedding_model: str,
        embedding_dimension: int | None = 384,
        provider_id: str | None = None,
        vector_db_name: str | None = None,
        provider_vector_db_id: str | None = None,
    ) -> VectorDB:
        """Internal method to register a vector database."""
        ...
    async def unregister_vector_db(self, vector_db_id: str) -> None:
        """Internal method to unregister a vector database."""
        ...
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -40,12 +40,20 @@ from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.resolver import InvalidProviderError
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    ServerStoresConfig,
    SqliteKVStoreConfig,
    SqliteSqlStoreConfig,
    SqlStoreReference,
    StorageConfig,
 )
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.exec import formulate_run_args, run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@ -286,21 +294,42 @@ def _generate_run_config(
    Generate a run.yaml template file for user to edit from a build.yaml file
    """
    apis = list(build_config.distribution_spec.providers.keys())
    distro_dir = DISTRIBS_BASE_DIR / image_name
    storage = StorageConfig(
        backends={
            "kv_default": SqliteKVStoreConfig(
                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
            ),
            "sql_default": SqliteSqlStoreConfig(
                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
            ),
        },
        stores=ServerStoresConfig(
            metadata=KVStoreReference(
                backend="kv_default",
                namespace="registry",
            ),
            inference=InferenceStoreReference(
                backend="sql_default",
                table_name="inference_store",
            ),
            conversations=SqlStoreReference(
                backend="sql_default",
                table_name="openai_conversations",
            ),
        ),
    )
    run_config = StackRunConfig(
        container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
        image_name=image_name,
        apis=apis,
        providers={},
        storage=storage,
        external_providers_dir=build_config.external_providers_dir
        if build_config.external_providers_dir
        else EXTERNAL_PROVIDERS_DIR,
    )
    if not run_config.inference_store:
        run_config.inference_store = SqliteSqlStoreConfig(
            **SqliteSqlStoreConfig.sample_run_config(
                __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
            )
        )
    # build providers dict
    provider_registry = get_provider_registry(build_config)
    for api in apis:
--- a/llama_stack/cli/stack/utils.py
+++ b/llama_stack/cli/stack/utils.py
@ -17,10 +17,19 @@ from llama_stack.core.datatypes import (
    BuildConfig,
    Provider,
    StackRunConfig,
    StorageConfig,
 )
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.resolver import InvalidProviderError
-from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    ServerStoresConfig,
    SqliteKVStoreConfig,
    SqliteSqlStoreConfig,
    SqlStoreReference,
 )
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
@ -51,11 +60,23 @@ def generate_run_config(
    Generate a run.yaml template file for user to edit from a build.yaml file
    """
    apis = list(build_config.distribution_spec.providers.keys())
    distro_dir = DISTRIBS_BASE_DIR / image_name
    run_config = StackRunConfig(
        container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
        image_name=image_name,
        apis=apis,
        providers={},
        storage=StorageConfig(
            backends={
                "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
                "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
            },
            stores=ServerStoresConfig(
                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
                conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
            ),
        ),
        external_providers_dir=build_config.external_providers_dir
        if build_config.external_providers_dir
        else EXTERNAL_PROVIDERS_DIR,
--- a/llama_stack/core/configure.py
+++ b/llama_stack/core/configure.py
@ -159,6 +159,37 @@ def upgrade_from_routing_table(
    config_dict["apis"] = config_dict["apis_to_serve"]
    config_dict.pop("apis_to_serve", None)
    # Add default storage config if not present
    if "storage" not in config_dict:
        config_dict["storage"] = {
            "backends": {
                "kv_default": {
                    "type": "kv_sqlite",
                    "db_path": "~/.llama/kvstore.db",
                },
                "sql_default": {
                    "type": "sql_sqlite",
                    "db_path": "~/.llama/sql_store.db",
                },
            },
            "stores": {
                "metadata": {
                    "namespace": "registry",
                    "backend": "kv_default",
                },
                "inference": {
                    "table_name": "inference_store",
                    "backend": "sql_default",
                    "max_write_queue_size": 10000,
                    "num_writers": 4,
                },
                "conversations": {
                    "table_name": "openai_conversations",
                    "backend": "sql_default",
                },
            },
        }
    return config_dict
--- a/llama_stack/core/conversations/conversations.py
+++ b/llama_stack/core/conversations/conversations.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
 import secrets
 import time
 from typing import Any
@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import (
    Conversations,
    Metadata,
 )
-from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
 from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import (
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
    SqliteSqlStoreConfig,
    SqlStoreConfig,
    sqlstore_impl,
 )
 logger = get_logger(name=__name__, category="openai_conversations")
@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations")
 class ConversationServiceConfig(BaseModel):
    """Configuration for the built-in conversation service.
-    :param conversations_store: SQL store configuration for conversations (defaults to SQLite)
+    :param run_config: Stack run configuration for resolving persistence
    :param policy: Access control rules
    """
-    conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
+    run_config: StackRunConfig
        db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
    )
    policy: list[AccessRule] = []
@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations):
        self.deps = deps
        self.policy = config.policy
-        base_sql_store = sqlstore_impl(config.conversations_store)
+        # Use conversations store reference from run config
        conversations_ref = config.run_config.storage.stores.conversations
        if not conversations_ref:
            raise ValueError("storage.stores.conversations must be configured in run config")
        base_sql_store = sqlstore_impl(conversations_ref)
        self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
    async def initialize(self) -> None:
        """Initialize the store and create tables."""
        if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
            os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
        await self.sql_store.create_table(
            "openai_conversations",
            {
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@ -26,9 +26,12 @@ from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
    KVStoreReference,
    StorageBackendType,
    StorageConfig,
 )
 from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
@ -351,12 +354,32 @@ class AuthenticationRequiredError(Exception):
    pass
 class QualifiedModel(BaseModel):
    """A qualified model identifier, consisting of a provider ID and a model ID."""
    provider_id: str
    model_id: str
 class VectorStoresConfig(BaseModel):
    """Configuration for vector stores in the stack."""
    default_provider_id: str | None = Field(
        default=None,
        description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
    )
    default_embedding_model: QualifiedModel | None = Field(
        default=None,
        description="Default embedding model configuration for vector stores.",
    )
 class QuotaPeriod(StrEnum):
    DAY = "day"
 class QuotaConfig(BaseModel):
-    kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
    anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
    authenticated_max_requests: int = Field(
        default=1000, description="Max requests for authenticated clients per period"
@ -438,18 +461,6 @@ class ServerConfig(BaseModel):
    )
 class InferenceStoreConfig(BaseModel):
    sql_store_config: SqlStoreConfig
    max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
    num_writers: int = Field(default=4, description="Number of concurrent background writers")
 class ResponsesStoreConfig(BaseModel):
    sql_store_config: SqlStoreConfig
    max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
    num_writers: int = Field(default=4, description="Number of concurrent background writers")
 class StackRunConfig(BaseModel):
    version: int = LLAMA_STACK_RUN_CONFIG_VERSION
@ -476,26 +487,8 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re
 can be instantiated multiple times (with different configs) if necessary.
 """,
    )
-    metadata_store: KVStoreConfig | None = Field(
+    storage: StorageConfig = Field(
-        default=None,
+        description="Catalog of named storage backends and references available to the stack",
        description="""
 Configuration for the persistence store used by the distribution registry. If not specified,
 a default SQLite store will be used.""",
    )
    inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
        default=None,
        description="""
 Configuration for the persistence store used by the inference API. Can be either a
 InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
 If not specified, a default SQLite store will be used.""",
    )
    conversations_store: SqlStoreConfig | None = Field(
        default=None,
        description="""
 Configuration for the persistence store used by the conversations API.
 If not specified, a default SQLite store will be used.""",
    )
    # registry of "resources" in the distribution
@ -526,6 +519,11 @@ If not specified, a default SQLite store will be used.""",
        description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
    )
    vector_stores: VectorStoresConfig | None = Field(
        default=None,
        description="Configuration for vector stores, including default embedding model",
    )
    @field_validator("external_providers_dir")
    @classmethod
    def validate_external_providers_dir(cls, v):
@ -535,6 +533,49 @@ If not specified, a default SQLite store will be used.""",
            return Path(v)
        return v
    @model_validator(mode="after")
    def validate_server_stores(self) -> "StackRunConfig":
        backend_map = self.storage.backends
        stores = self.storage.stores
        kv_backends = {
            name
            for name, cfg in backend_map.items()
            if cfg.type
            in {
                StorageBackendType.KV_REDIS,
                StorageBackendType.KV_SQLITE,
                StorageBackendType.KV_POSTGRES,
                StorageBackendType.KV_MONGODB,
            }
        }
        sql_backends = {
            name
            for name, cfg in backend_map.items()
            if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
        }
        def _ensure_backend(reference, expected_set, store_name: str) -> None:
            if reference is None:
                return
            backend_name = reference.backend
            if backend_name not in backend_map:
                raise ValueError(
                    f"{store_name} references unknown backend '{backend_name}'. "
                    f"Available backends: {sorted(backend_map)}"
                )
            if backend_name not in expected_set:
                raise ValueError(
                    f"{store_name} references backend '{backend_name}' of type "
                    f"'{backend_map[backend_name].type.value}', but a backend of type "
                    f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
                )
        _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
        _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
        _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
        _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
        return self
 class BuildConfig(BaseModel):
    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
--- a/llama_stack/core/distribution.py
+++ b/llama_stack/core/distribution.py
@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
            routing_table_api=Api.tool_groups,
            router_api=Api.tool_runtime,
        ),
        AutoRoutedApiInfo(
            routing_table_api=Api.vector_dbs,
            router_api=Api.vector_io,
        ),
    ]
--- a/llama_stack/core/prompts/prompts.py
+++ b/llama_stack/core/prompts/prompts.py
@ -11,9 +11,8 @@ from pydantic import BaseModel
 from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 class PromptServiceConfig(BaseModel):
@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts):
        self.kvstore: KVStore
    async def initialize(self) -> None:
-        kvstore_config = SqliteKVStoreConfig(
+        # Use metadata store backend with prompts-specific namespace
-            db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
+        metadata_ref = self.config.run_config.storage.stores.metadata
-        )
+        if not metadata_ref:
-        self.kvstore = await kvstore_impl(kvstore_config)
+            raise ValueError("storage.stores.metadata must be configured in run config")
        prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
        self.kvstore = await kvstore_impl(prompts_ref)
    def _get_default_key(self, prompt_id: str) -> str:
        """Get the KVStore key that stores the default version number."""
--- a/llama_stack/core/resolver.py
+++ b/llama_stack/core/resolver.py
@ -29,6 +29,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack.core.client import get_client_impl
@ -81,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
        Api.inspect: Inspect,
        Api.batches: Batches,
        Api.vector_io: VectorIO,
        Api.vector_dbs: VectorDBs,
        Api.models: Models,
        Api.safety: Safety,
        Api.shields: Shields,
--- a/llama_stack/core/routers/init.py
+++ b/llama_stack/core/routers/init.py
@ -6,7 +6,10 @@
 from typing import Any
-from llama_stack.core.datatypes import AccessRule, RoutedProtocol
+from llama_stack.core.datatypes import (
    AccessRule,
    RoutedProtocol,
 )
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
@ -26,6 +29,7 @@ async def get_routing_table_impl(
    from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
    from ..routing_tables.shields import ShieldsRoutingTable
    from ..routing_tables.toolgroups import ToolGroupsRoutingTable
    from ..routing_tables.vector_dbs import VectorDBsRoutingTable
    api_to_tables = {
        "models": ModelsRoutingTable,
@ -34,6 +38,7 @@ async def get_routing_table_impl(
        "scoring_functions": ScoringFunctionsRoutingTable,
        "benchmarks": BenchmarksRoutingTable,
        "tool_groups": ToolGroupsRoutingTable,
        "vector_dbs": VectorDBsRoutingTable,
    }
    if api.value not in api_to_tables:
@ -76,14 +81,21 @@ async def get_auto_router_impl(
                api_to_dep_impl[dep_name] = deps[dep_api]
    # TODO: move pass configs to routers instead
-    if api == Api.inference and run_config.inference_store:
+    if api == Api.inference:
        inference_ref = run_config.storage.stores.inference
        if not inference_ref:
            raise ValueError("storage.stores.inference must be configured in run config")
        inference_store = InferenceStore(
-            config=run_config.inference_store,
+            reference=inference_ref,
            policy=policy,
        )
        await inference_store.initialize()
        api_to_dep_impl["store"] = inference_store
    elif api == Api.vector_io:
        api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
    impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
    await impl.initialize()
    return impl
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@ -31,6 +31,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
 from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
@ -43,9 +44,11 @@ class VectorIORouter(VectorIO):
    def __init__(
        self,
        routing_table: RoutingTable,
        vector_stores_config: VectorStoresConfig | None = None,
    ) -> None:
        logger.debug("Initializing VectorIORouter")
        self.routing_table = routing_table
        self.vector_stores_config = vector_stores_config
    async def initialize(self) -> None:
        logger.debug("VectorIORouter.initialize")
@ -122,6 +125,17 @@ class VectorIORouter(VectorIO):
        embedding_dimension = extra.get("embedding_dimension")
        provider_id = extra.get("provider_id")
        # Use default embedding model if not specified
        if (
            embedding_model is None
            and self.vector_stores_config
            and self.vector_stores_config.default_embedding_model is not None
        ):
            # Construct the full model ID with provider prefix
            embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
            model_id = self.vector_stores_config.default_embedding_model.model_id
            embedding_model = f"{embedding_provider_id}/{model_id}"
        if embedding_model is not None and embedding_dimension is None:
            embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
@ -132,11 +146,24 @@ class VectorIORouter(VectorIO):
                raise ValueError("No vector_io providers available")
            if num_providers > 1:
                available_providers = list(self.routing_table.impls_by_provider_id.keys())
-                raise ValueError(
+                # Use default configured provider
-                    f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
+                if self.vector_stores_config and self.vector_stores_config.default_provider_id:
-                    f"Available providers: {available_providers}"
+                    default_provider = self.vector_stores_config.default_provider_id
-                )
+                    if default_provider in available_providers:
-            provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
+                        provider_id = default_provider
                        logger.debug(f"Using configured default vector store provider: {provider_id}")
                    else:
                        raise ValueError(
                            f"Configured default vector store provider '{default_provider}' not found. "
                            f"Available providers: {available_providers}"
                        )
                else:
                    raise ValueError(
                        f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
                        f"Available providers: {available_providers}"
                    )
            else:
                provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
        vector_db_id = f"vs_{uuid.uuid4()}"
        registered_vector_db = await self.routing_table.register_vector_db(
@ -243,8 +270,7 @@ class VectorIORouter(VectorIO):
        vector_store_id: str,
    ) -> VectorStoreDeleteResponse:
        logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
-        provider = await self.routing_table.get_provider_impl(vector_store_id)
+        return await self.routing_table.openai_delete_vector_store(vector_store_id)
        return await provider.openai_delete_vector_store(vector_store_id)
    async def openai_search_vector_store(
        self,
--- a/llama_stack/core/routing_tables/common.py
+++ b/llama_stack/core/routing_tables/common.py
@ -134,12 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
        from .scoring_functions import ScoringFunctionsRoutingTable
        from .shields import ShieldsRoutingTable
        from .toolgroups import ToolGroupsRoutingTable
        from .vector_dbs import VectorDBsRoutingTable
        def apiname_object():
            if isinstance(self, ModelsRoutingTable):
                return ("Inference", "model")
            elif isinstance(self, ShieldsRoutingTable):
                return ("Safety", "shield")
            elif isinstance(self, VectorDBsRoutingTable):
                return ("VectorIO", "vector_db")
            elif isinstance(self, DatasetsRoutingTable):
                return ("DatasetIO", "dataset")
            elif isinstance(self, ScoringFunctionsRoutingTable):
--- a/llama_stack/core/routing_tables/vector_dbs.py
+++ b/llama_stack/core/routing_tables/vector_dbs.py
@ -0,0 +1,323 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any
 from pydantic import TypeAdapter
 from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
 from llama_stack.apis.models import ModelType
 from llama_stack.apis.resource import ResourceType
 # Removed VectorDBs import to avoid exposing public API
 from llama_stack.apis.vector_io.vector_io import (
    OpenAICreateVectorStoreRequestWithExtraBody,
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
    VectorStoreFileContentsResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFileStatus,
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
 from llama_stack.core.datatypes import (
    VectorDBWithOwner,
 )
 from llama_stack.log import get_logger
 from .common import CommonRoutingTableImpl, lookup_model
 logger = get_logger(name=__name__, category="core::routing_tables")
 class VectorDBsRoutingTable(CommonRoutingTableImpl):
    """Internal routing table for vector_db operations.
    Does not inherit from VectorDBs to avoid exposing public API endpoints.
    Only provides internal routing functionality for VectorIORouter.
    """
    # Internal methods only - no public API exposure
    async def register_vector_db(
        self,
        vector_db_id: str,
        embedding_model: str,
        embedding_dimension: int | None = 384,
        provider_id: str | None = None,
        provider_vector_db_id: str | None = None,
        vector_db_name: str | None = None,
    ) -> Any:
        if provider_id is None:
            if len(self.impls_by_provider_id) > 0:
                provider_id = list(self.impls_by_provider_id.keys())[0]
                if len(self.impls_by_provider_id) > 1:
                    logger.warning(
                        f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
                    )
            else:
                raise ValueError("No provider available. Please configure a vector_io provider.")
        model = await lookup_model(self, embedding_model)
        if model is None:
            raise ModelNotFoundError(embedding_model)
        if model.model_type != ModelType.embedding:
            raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
        if "embedding_dimension" not in model.metadata:
            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
        try:
            provider = self.impls_by_provider_id[provider_id]
        except KeyError:
            available_providers = list(self.impls_by_provider_id.keys())
            raise ValueError(
                f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
            ) from None
        logger.warning(
            "VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
        )
        request = OpenAICreateVectorStoreRequestWithExtraBody(
            name=vector_db_name or vector_db_id,
            embedding_model=embedding_model,
            embedding_dimension=model.metadata["embedding_dimension"],
            provider_id=provider_id,
            provider_vector_db_id=provider_vector_db_id,
        )
        vector_store = await provider.openai_create_vector_store(request)
        vector_store_id = vector_store.id
        actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
        logger.warning(
            f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
        )
        vector_db_data = {
            "identifier": vector_store_id,
            "type": ResourceType.vector_db.value,
            "provider_id": provider_id,
            "provider_resource_id": actual_provider_vector_db_id,
            "embedding_model": embedding_model,
            "embedding_dimension": model.metadata["embedding_dimension"],
            "vector_db_name": vector_store.name,
        }
        vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
        await self.register_object(vector_db)
        return vector_db
    async def openai_retrieve_vector_store(
        self,
        vector_store_id: str,
    ) -> VectorStoreObject:
        await self.assert_action_allowed("read", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store(vector_store_id)
    async def openai_update_vector_store(
        self,
        vector_store_id: str,
        name: str | None = None,
        expires_after: dict[str, Any] | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> VectorStoreObject:
        await self.assert_action_allowed("update", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_update_vector_store(
            vector_store_id=vector_store_id,
            name=name,
            expires_after=expires_after,
            metadata=metadata,
        )
    async def openai_delete_vector_store(
        self,
        vector_store_id: str,
    ) -> VectorStoreDeleteResponse:
        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        result = await provider.openai_delete_vector_store(vector_store_id)
        await self.unregister_vector_db(vector_store_id)
        return result
    async def unregister_vector_db(self, vector_store_id: str) -> None:
        """Remove the vector store from the routing table registry."""
        try:
            vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id)
            if vector_db_obj:
                await self.unregister_object(vector_db_obj)
        except Exception as e:
            # Log the error but don't fail the operation
            logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
    async def openai_search_vector_store(
        self,
        vector_store_id: str,
        query: str | list[str],
        filters: dict[str, Any] | None = None,
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
        await self.assert_action_allowed("read", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_search_vector_store(
            vector_store_id=vector_store_id,
            query=query,
            filters=filters,
            max_num_results=max_num_results,
            ranking_options=ranking_options,
            rewrite_query=rewrite_query,
            search_mode=search_mode,
        )
    async def openai_attach_file_to_vector_store(
        self,
        vector_store_id: str,
        file_id: str,
        attributes: dict[str, Any] | None = None,
        chunking_strategy: VectorStoreChunkingStrategy | None = None,
    ) -> VectorStoreFileObject:
        await self.assert_action_allowed("update", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_attach_file_to_vector_store(
            vector_store_id=vector_store_id,
            file_id=file_id,
            attributes=attributes,
            chunking_strategy=chunking_strategy,
        )
    async def openai_list_files_in_vector_store(
        self,
        vector_store_id: str,
        limit: int | None = 20,
        order: str | None = "desc",
        after: str | None = None,
        before: str | None = None,
        filter: VectorStoreFileStatus | None = None,
    ) -> list[VectorStoreFileObject]:
        await self.assert_action_allowed("read", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_list_files_in_vector_store(
            vector_store_id=vector_store_id,
            limit=limit,
            order=order,
            after=after,
            before=before,
            filter=filter,
        )
    async def openai_retrieve_vector_store_file(
        self,
        vector_store_id: str,
        file_id: str,
    ) -> VectorStoreFileObject:
        await self.assert_action_allowed("read", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file(
            vector_store_id=vector_store_id,
            file_id=file_id,
        )
    async def openai_retrieve_vector_store_file_contents(
        self,
        vector_store_id: str,
        file_id: str,
    ) -> VectorStoreFileContentsResponse:
        await self.assert_action_allowed("read", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
            vector_store_id=vector_store_id,
            file_id=file_id,
        )
    async def openai_update_vector_store_file(
        self,
        vector_store_id: str,
        file_id: str,
        attributes: dict[str, Any],
    ) -> VectorStoreFileObject:
        await self.assert_action_allowed("update", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_update_vector_store_file(
            vector_store_id=vector_store_id,
            file_id=file_id,
            attributes=attributes,
        )
    async def openai_delete_vector_store_file(
        self,
        vector_store_id: str,
        file_id: str,
    ) -> VectorStoreFileDeleteResponse:
        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_delete_vector_store_file(
            vector_store_id=vector_store_id,
            file_id=file_id,
        )
    async def openai_create_vector_store_file_batch(
        self,
        vector_store_id: str,
        file_ids: list[str],
        attributes: dict[str, Any] | None = None,
        chunking_strategy: Any | None = None,
    ):
        await self.assert_action_allowed("update", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_create_vector_store_file_batch(
            vector_store_id=vector_store_id,
            file_ids=file_ids,
            attributes=attributes,
            chunking_strategy=chunking_strategy,
        )
    async def openai_retrieve_vector_store_file_batch(
        self,
        batch_id: str,
        vector_store_id: str,
    ):
        await self.assert_action_allowed("read", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_batch(
            batch_id=batch_id,
            vector_store_id=vector_store_id,
        )
    async def openai_list_files_in_vector_store_file_batch(
        self,
        batch_id: str,
        vector_store_id: str,
        after: str | None = None,
        before: str | None = None,
        filter: str | None = None,
        limit: int | None = 20,
        order: str | None = "desc",
    ):
        await self.assert_action_allowed("read", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_list_files_in_vector_store_file_batch(
            batch_id=batch_id,
            vector_store_id=vector_store_id,
            after=after,
            before=before,
            filter=filter,
            limit=limit,
            order=order,
        )
    async def openai_cancel_vector_store_file_batch(
        self,
        batch_id: str,
        vector_store_id: str,
    ):
        await self.assert_action_allowed("update", "vector_db", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_cancel_vector_store_file_batch(
            batch_id=batch_id,
            vector_store_id=vector_store_id,
        )
--- a/llama_stack/core/server/auth_providers.py
+++ b/llama_stack/core/server/auth_providers.py
@ -72,13 +72,30 @@ class AuthProvider(ABC):
 def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
    attributes: dict[str, list[str]] = {}
    for claim_key, attribute_key in mapping.items():
-        if claim_key not in claims:
+        # First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles")
        # Then fall back to literal key with dots (e.g., "my.dotted.key")
        claim: object = claims
        keys = claim_key.split(".")
        for key in keys:
            if isinstance(claim, dict) and key in claim:
                claim = claim[key]
            else:
                claim = None
                break
        if claim is None and claim_key in claims:
            # Fall back to checking if claim_key exists as a literal key
            claim = claims[claim_key]
        if claim is None:
            continue
-        claim = claims[claim_key]
+
        if isinstance(claim, list):
            values = claim
-        else:
+        elif isinstance(claim, str):
            values = claim.split()
        else:
            continue
        if attribute_key in attributes:
            attributes[attribute_key].extend(values)
--- a/llama_stack/core/server/quota.py
+++ b/llama_stack/core/server/quota.py
@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta
 from starlette.types import ASGIApp, Receive, Scope, Send
 from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
 from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
 logger = get_logger(name=__name__, category="core::server")
@ -33,7 +33,7 @@ class QuotaMiddleware:
    def __init__(
        self,
        app: ASGIApp,
-        kv_config: KVStoreConfig,
+        kv_config: KVStoreReference,
        anonymous_max_requests: int,
        authenticated_max_requests: int,
        window_seconds: int = 86400,
@ -45,15 +45,15 @@ class QuotaMiddleware:
        self.authenticated_max_requests = authenticated_max_requests
        self.window_seconds = window_seconds
        if isinstance(self.kv_config, SqliteKVStoreConfig):
            logger.warning(
                "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
                f"window_seconds={self.window_seconds}"
            )
    async def _get_kv(self) -> KVStore:
        if self.kv is None:
            self.kv = await kvstore_impl(self.kv_config)
            backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
            if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
                logger.warning(
                    "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
                    f"window_seconds={self.window_seconds}"
                )
        return self.kv
    async def __call__(self, scope: Scope, receive: Receive, send: Send):
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -35,13 +35,23 @@ from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
-from llama_stack.core.datatypes import Provider, StackRunConfig
+from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
 from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
 from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
 from llama_stack.core.resolver import ProviderRegistry, resolve_impls
 from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    ServerStoresConfig,
    SqliteKVStoreConfig,
    SqliteSqlStoreConfig,
    SqlStoreReference,
    StorageBackendConfig,
    StorageConfig,
 )
 from llama_stack.core.store.registry import create_dist_registry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
@ -98,30 +108,6 @@ REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None
 async def validate_default_embedding_model(impls: dict[Api, Any]):
    """Validate that at most one embedding model is marked as default."""
    if Api.models not in impls:
        return
    models_impl = impls[Api.models]
    response = await models_impl.list_models()
    models_list = response.data if hasattr(response, "data") else response
    default_embedding_models = []
    for model in models_list:
        if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
            default_embedding_models.append(model.identifier)
    if len(default_embedding_models) > 1:
        raise ValueError(
            f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
            "Only one embedding model can be marked as default."
        )
    if default_embedding_models:
        logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
 async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
    for rsrc, api, register_method, list_method in RESOURCES:
        objects = getattr(run_config, rsrc)
@ -152,7 +138,41 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
                f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
            )
-    await validate_default_embedding_model(impls)
+
 async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
    """Validate vector stores configuration."""
    if vector_stores_config is None:
        return
    default_embedding_model = vector_stores_config.default_embedding_model
    if default_embedding_model is None:
        return
    provider_id = default_embedding_model.provider_id
    model_id = default_embedding_model.model_id
    default_model_id = f"{provider_id}/{model_id}"
    if Api.models not in impls:
        raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
    models_impl = impls[Api.models]
    response = await models_impl.list_models()
    models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
    default_model = models_list.get(default_model_id)
    if default_model is None:
        raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
    embedding_dimension = default_model.metadata.get("embedding_dimension")
    if embedding_dimension is None:
        raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
    try:
        int(embedding_dimension)
    except ValueError as err:
        raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
    logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
 class EnvVarError(Exception):
@ -329,6 +349,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
    impls[Api.conversations] = conversations_impl
 def _initialize_storage(run_config: StackRunConfig):
    kv_backends: dict[str, StorageBackendConfig] = {}
    sql_backends: dict[str, StorageBackendConfig] = {}
    for backend_name, backend_config in run_config.storage.backends.items():
        type = backend_config.type.value
        if type.startswith("kv_"):
            kv_backends[backend_name] = backend_config
        elif type.startswith("sql_"):
            sql_backends[backend_name] = backend_config
        else:
            raise ValueError(f"Unknown storage backend type: {type}")
    from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
    from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
    register_kvstore_backends(kv_backends)
    register_sqlstore_backends(sql_backends)
 class Stack:
    def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
        self.run_config = run_config
@ -347,7 +386,11 @@ class Stack:
                TEST_RECORDING_CONTEXT.__enter__()
                logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
-        dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
+        _initialize_storage(self.run_config)
        stores = self.run_config.storage.stores
        if not stores.metadata:
            raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
        dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
        policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
        internal_impls = {}
@ -367,8 +410,8 @@ class Stack:
            await impls[Api.conversations].initialize()
        await register_resources(self.run_config, impls)
        await refresh_registry_once(impls)
        await validate_vector_stores_config(self.run_config.vector_stores, impls)
        self.impls = impls
    def create_registry_refresh_task(self):
@ -488,5 +531,16 @@ def run_config_from_adhoc_config_spec(
        image_name="distro-test",
        apis=list(provider_configs_by_api.keys()),
        providers=provider_configs_by_api,
        storage=StorageConfig(
            backends={
                "kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"),
                "sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"),
            },
            stores=ServerStoresConfig(
                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
                conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
            ),
        ),
    )
    return config
--- a/llama_stack/core/storage/init.py
+++ b/llama_stack/core/storage/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/llama_stack/core/storage/datatypes.py
+++ b/llama_stack/core/storage/datatypes.py
@ -0,0 +1,283 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import re
 from abc import abstractmethod
 from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Literal
 from pydantic import BaseModel, Field, field_validator
 class StorageBackendType(StrEnum):
    KV_REDIS = "kv_redis"
    KV_SQLITE = "kv_sqlite"
    KV_POSTGRES = "kv_postgres"
    KV_MONGODB = "kv_mongodb"
    SQL_SQLITE = "sql_sqlite"
    SQL_POSTGRES = "sql_postgres"
 class CommonConfig(BaseModel):
    namespace: str | None = Field(
        default=None,
        description="All keys will be prefixed with this namespace",
    )
 class RedisKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS
    host: str = "localhost"
    port: int = 6379
    @property
    def url(self) -> str:
        return f"redis://{self.host}:{self.port}"
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["redis"]
    @classmethod
    def sample_run_config(cls):
        return {
            "type": StorageBackendType.KV_REDIS.value,
            "host": "${env.REDIS_HOST:=localhost}",
            "port": "${env.REDIS_PORT:=6379}",
        }
 class SqliteKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE
    db_path: str = Field(
        description="File path for the sqlite database",
    )
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["aiosqlite"]
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
        return {
            "type": StorageBackendType.KV_SQLITE.value,
            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        }
 class PostgresKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
    host: str = "localhost"
    port: int | str = 5432
    db: str = "llamastack"
    user: str
    password: str | None = None
    ssl_mode: str | None = None
    ca_cert_path: str | None = None
    table_name: str = "llamastack_kvstore"
    @classmethod
    def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
        return {
            "type": StorageBackendType.KV_POSTGRES.value,
            "host": "${env.POSTGRES_HOST:=localhost}",
            "port": "${env.POSTGRES_PORT:=5432}",
            "db": "${env.POSTGRES_DB:=llamastack}",
            "user": "${env.POSTGRES_USER:=llamastack}",
            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
            "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
        }
    @classmethod
    @field_validator("table_name")
    def validate_table_name(cls, v: str) -> str:
        # PostgreSQL identifiers rules:
        # - Must start with a letter or underscore
        # - Can contain letters, numbers, and underscores
        # - Maximum length is 63 bytes
        pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
        if not re.match(pattern, v):
            raise ValueError(
                "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
            )
        if len(v) > 63:
            raise ValueError("Table name must be less than 63 characters")
        return v
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["psycopg2-binary"]
 class MongoDBKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB
    host: str = "localhost"
    port: int = 27017
    db: str = "llamastack"
    user: str | None = None
    password: str | None = None
    collection_name: str = "llamastack_kvstore"
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["pymongo"]
    @classmethod
    def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
        return {
            "type": StorageBackendType.KV_MONGODB.value,
            "host": "${env.MONGODB_HOST:=localhost}",
            "port": "${env.MONGODB_PORT:=5432}",
            "db": "${env.MONGODB_DB}",
            "user": "${env.MONGODB_USER}",
            "password": "${env.MONGODB_PASSWORD}",
            "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
        }
 class SqlAlchemySqlStoreConfig(BaseModel):
    @property
    @abstractmethod
    def engine_str(self) -> str: ...
    # TODO: move this when we have a better way to specify dependencies with internal APIs
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["sqlalchemy[asyncio]"]
 class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
    type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
    db_path: str = Field(
        description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
    )
    @property
    def engine_str(self) -> str:
        return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
        return {
            "type": StorageBackendType.SQL_SQLITE.value,
            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        }
    @classmethod
    def pip_packages(cls) -> list[str]:
        return super().pip_packages() + ["aiosqlite"]
 class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
    type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
    host: str = "localhost"
    port: int | str = 5432
    db: str = "llamastack"
    user: str
    password: str | None = None
    @property
    def engine_str(self) -> str:
        return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
    @classmethod
    def pip_packages(cls) -> list[str]:
        return super().pip_packages() + ["asyncpg"]
    @classmethod
    def sample_run_config(cls, **kwargs):
        return {
            "type": StorageBackendType.SQL_POSTGRES.value,
            "host": "${env.POSTGRES_HOST:=localhost}",
            "port": "${env.POSTGRES_PORT:=5432}",
            "db": "${env.POSTGRES_DB:=llamastack}",
            "user": "${env.POSTGRES_USER:=llamastack}",
            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
        }
 # reference = (backend_name, table_name)
 class SqlStoreReference(BaseModel):
    """A reference to a 'SQL-like' persistent store. A table name must be provided."""
    table_name: str = Field(
        description="Name of the table to use for the SqlStore",
    )
    backend: str = Field(
        description="Name of backend from storage.backends",
    )
 # reference = (backend_name, namespace)
 class KVStoreReference(BaseModel):
    """A reference to a 'key-value' persistent store. A namespace must be provided."""
    namespace: str = Field(
        description="Key prefix for KVStore backends",
    )
    backend: str = Field(
        description="Name of backend from storage.backends",
    )
 StorageBackendConfig = Annotated[
    RedisKVStoreConfig
    | SqliteKVStoreConfig
    | PostgresKVStoreConfig
    | MongoDBKVStoreConfig
    | SqliteSqlStoreConfig
    | PostgresSqlStoreConfig,
    Field(discriminator="type"),
 ]
 class InferenceStoreReference(SqlStoreReference):
    """Inference store configuration with queue tuning."""
    max_write_queue_size: int = Field(
        default=10000,
        description="Max queued writes for inference store",
    )
    num_writers: int = Field(
        default=4,
        description="Number of concurrent background writers",
    )
 class ResponsesStoreReference(InferenceStoreReference):
    """Responses store configuration with queue tuning."""
 class ServerStoresConfig(BaseModel):
    metadata: KVStoreReference | None = Field(
        default=None,
        description="Metadata store configuration (uses KV backend)",
    )
    inference: InferenceStoreReference | None = Field(
        default=None,
        description="Inference store configuration (uses SQL backend)",
    )
    conversations: SqlStoreReference | None = Field(
        default=None,
        description="Conversations store configuration (uses SQL backend)",
    )
    responses: ResponsesStoreReference | None = Field(
        default=None,
        description="Responses store configuration (uses SQL backend)",
    )
 class StorageConfig(BaseModel):
    backends: dict[str, StorageBackendConfig] = Field(
        description="Named backend configurations (e.g., 'default', 'cache')",
    )
    stores: ServerStoresConfig = Field(
        default_factory=lambda: ServerStoresConfig(),
        description="Named references to storage backends used by the stack core",
    )
--- a/llama_stack/core/store/registry.py
+++ b/llama_stack/core/store/registry.py
@ -11,10 +11,9 @@ from typing import Protocol
 import pydantic
 from llama_stack.core.datatypes import RoutableObjectWithProvider
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 logger = get_logger(__name__, category="core::registry")
@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
 async def create_dist_registry(
-    metadata_store: KVStoreConfig | None,
+    metadata_store: KVStoreReference, image_name: str
    image_name: str,
 ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
    # instantiate kvstore for storing and retrieving distribution metadata
-    if metadata_store:
+    dist_kvstore = await kvstore_impl(metadata_store)
        dist_kvstore = await kvstore_impl(metadata_store)
    else:
        dist_kvstore = await kvstore_impl(
            SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
        )
    dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
    await dist_registry.initialize()
    return dist_registry, dist_kvstore
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@ -25,6 +25,8 @@ distribution_spec:
    - provider_type: inline::milvus
    - provider_type: remote::chromadb
    - provider_type: remote::pgvector
    - provider_type: remote::qdrant
    - provider_type: remote::weaviate
    files:
    - provider_type: inline::localfs
    safety:
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@ -93,30 +93,30 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
+        backend: kv_default
  - provider_id: sqlite-vec
    provider_type: inline::sqlite-vec
    config:
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::sqlite_vec
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
+        backend: kv_default
  - provider_id: ${env.MILVUS_URL:+milvus}
    provider_type: inline::milvus
    config:
      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::milvus
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
+        backend: kv_default
  - provider_id: ${env.CHROMADB_URL:+chromadb}
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::chroma_remote
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db
+        backend: kv_default
  - provider_id: ${env.PGVECTOR_DB:+pgvector}
    provider_type: remote::pgvector
    config:
@ -125,17 +125,32 @@ providers:
      db: ${env.PGVECTOR_DB:=}
      user: ${env.PGVECTOR_USER:=}
      password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::pgvector
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
+        backend: kv_default
  - provider_id: ${env.QDRANT_URL:+qdrant}
    provider_type: remote::qdrant
    config:
      api_key: ${env.QDRANT_API_KEY:=}
      persistence:
        namespace: vector_io::qdrant_remote
        backend: kv_default
  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
    provider_type: remote::weaviate
    config:
      weaviate_api_key: null
      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
      persistence:
        namespace: vector_io::weaviate
        backend: kv_default
  files:
  - provider_id: meta-reference-files
    provider_type: inline::localfs
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
      metadata_store:
-        type: sqlite
+        table_name: files_metadata
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
+        backend: sql_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  post_training:
  - provider_id: torchtune-cpu
    provider_type: inline::torchtune-cpu
@ -163,21 +181,21 @@ providers:
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -207,17 +225,28 @@ providers:
    provider_type: inline::reference
    config:
      kvstore:
-        type: sqlite
+        namespace: batches
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/batches.db
+        backend: kv_default
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
@ -239,3 +268,8 @@ server:
  port: 8321
 telemetry:
  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
--- a/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/llama_stack/distributions/dell/run-with-safety.yaml
@ -26,9 +26,9 @@ providers:
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::chroma_remote
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
+        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -38,32 +38,35 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -86,15 +89,26 @@ providers:
      max_results: 3
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/dell/run.yaml
+++ b/llama_stack/distributions/dell/run.yaml
@ -22,9 +22,9 @@ providers:
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::chroma_remote
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
+        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -34,32 +34,35 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -82,15 +85,26 @@ providers:
      max_results: 3
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@ -37,9 +37,9 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -49,32 +49,35 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -99,15 +102,26 @@ providers:
    provider_type: inline::rag-runtime
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run.yaml
@ -27,9 +27,9 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -39,32 +39,35 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -89,15 +92,26 @@ providers:
    provider_type: inline::rag-runtime
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/llama_stack/distributions/nvidia/run-with-safety.yaml
@ -28,9 +28,9 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
+        backend: kv_default
  safety:
  - provider_id: nvidia
    provider_type: remote::nvidia
@ -41,12 +41,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: nvidia
    provider_type: remote::nvidia
@ -65,8 +68,8 @@ providers:
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
+        backend: kv_default
  - provider_id: nvidia
    provider_type: remote::nvidia
    config:
@ -86,17 +89,28 @@ providers:
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
      metadata_store:
-        type: sqlite
+        table_name: files_metadata
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
+        backend: sql_default
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/nvidia/run.yaml
+++ b/llama_stack/distributions/nvidia/run.yaml
@ -23,9 +23,9 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
+        backend: kv_default
  safety:
  - provider_id: nvidia
    provider_type: remote::nvidia
@ -36,12 +36,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: nvidia
    provider_type: remote::nvidia
@ -75,17 +78,28 @@ providers:
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
      metadata_store:
-        type: sqlite
+        table_name: files_metadata
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
+        backend: sql_default
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models: []
 shields: []
 vector_dbs: []
--- a/llama_stack/distributions/open-benchmark/run.yaml
+++ b/llama_stack/distributions/open-benchmark/run.yaml
@ -39,16 +39,16 @@ providers:
    provider_type: inline::sqlite-vec
    config:
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::sqlite_vec
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec_registry.db
+        backend: kv_default
  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::chroma_remote
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/chroma_remote_registry.db
+        backend: kv_default
  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
    provider_type: remote::pgvector
    config:
@ -57,9 +57,9 @@ providers:
      db: ${env.PGVECTOR_DB:=}
      user: ${env.PGVECTOR_USER:=}
      password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::pgvector
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/pgvector_registry.db
+        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -69,32 +69,35 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -119,15 +122,26 @@ providers:
    provider_type: inline::rag-runtime
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models:
 - metadata: {}
  model_id: gpt-4o
--- a/llama_stack/distributions/postgres-demo/postgres_demo.py
+++ b/llama_stack/distributions/postgres-demo/postgres_demo.py
@ -91,7 +91,6 @@ def get_distribution_template() -> DistributionTemplate:
            "embedding_dimension": 768,
        },
    )
    postgres_config = PostgresSqlStoreConfig.sample_run_config()
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",
@ -105,22 +104,16 @@ def get_distribution_template() -> DistributionTemplate:
                provider_overrides={
                    "inference": inference_providers + [embedding_provider],
                    "vector_io": vector_io_providers,
                    "agents": [
                        Provider(
                            provider_id="meta-reference",
                            provider_type="inline::meta-reference",
                            config=dict(
                                persistence_store=postgres_config,
                                responses_store=postgres_config,
                            ),
                        )
                    ],
                },
                default_models=default_models + [embedding_model],
                default_tool_groups=default_tool_groups,
                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                metadata_store=PostgresKVStoreConfig.sample_run_config(),
+                storage_backends={
-                inference_store=postgres_config,
+                    "kv_default": PostgresKVStoreConfig.sample_run_config(
                        table_name="llamastack_kvstore",
                    ),
                    "sql_default": PostgresSqlStoreConfig.sample_run_config(),
                },
            ),
        },
        run_config_env_vars={
--- a/llama_stack/distributions/postgres-demo/run.yaml
+++ b/llama_stack/distributions/postgres-demo/run.yaml
@ -22,9 +22,9 @@ providers:
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::chroma_remote
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db
+        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -34,20 +34,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: postgres
+        agent_state:
-        host: ${env.POSTGRES_HOST:=localhost}
+          namespace: agents
-        port: ${env.POSTGRES_PORT:=5432}
+          backend: kv_default
-        db: ${env.POSTGRES_DB:=llamastack}
+        responses:
-        user: ${env.POSTGRES_USER:=llamastack}
+          table_name: responses
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
+          backend: sql_default
-      responses_store:
+          max_write_queue_size: 10000
-        type: postgres
+          num_writers: 4
        host: ${env.POSTGRES_HOST:=localhost}
        port: ${env.POSTGRES_PORT:=5432}
        db: ${env.POSTGRES_DB:=llamastack}
        user: ${env.POSTGRES_USER:=llamastack}
        password: ${env.POSTGRES_PASSWORD:=llamastack}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -63,24 +58,35 @@ providers:
    provider_type: inline::rag-runtime
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
-metadata_store:
+storage:
-  type: postgres
+  backends:
-  host: ${env.POSTGRES_HOST:=localhost}
+    kv_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: kv_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-  table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+      user: ${env.POSTGRES_USER:=llamastack}
-inference_store:
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
-  type: postgres
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-  host: ${env.POSTGRES_HOST:=localhost}
+    sql_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: sql_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-conversations_store:
+      user: ${env.POSTGRES_USER:=llamastack}
-  type: sqlite
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@ -26,6 +26,8 @@ distribution_spec:
    - provider_type: inline::milvus
    - provider_type: remote::chromadb
    - provider_type: remote::pgvector
    - provider_type: remote::qdrant
    - provider_type: remote::weaviate
    files:
    - provider_type: inline::localfs
    safety:
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@ -93,30 +93,30 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db
+        backend: kv_default
  - provider_id: sqlite-vec
    provider_type: inline::sqlite-vec
    config:
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::sqlite_vec
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db
+        backend: kv_default
  - provider_id: ${env.MILVUS_URL:+milvus}
    provider_type: inline::milvus
    config:
      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::milvus
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db
+        backend: kv_default
  - provider_id: ${env.CHROMADB_URL:+chromadb}
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::chroma_remote
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db
+        backend: kv_default
  - provider_id: ${env.PGVECTOR_DB:+pgvector}
    provider_type: remote::pgvector
    config:
@ -125,17 +125,32 @@ providers:
      db: ${env.PGVECTOR_DB:=}
      user: ${env.PGVECTOR_USER:=}
      password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::pgvector
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db
+        backend: kv_default
  - provider_id: ${env.QDRANT_URL:+qdrant}
    provider_type: remote::qdrant
    config:
      api_key: ${env.QDRANT_API_KEY:=}
      persistence:
        namespace: vector_io::qdrant_remote
        backend: kv_default
  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
    provider_type: remote::weaviate
    config:
      weaviate_api_key: null
      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
      persistence:
        namespace: vector_io::weaviate
        backend: kv_default
  files:
  - provider_id: meta-reference-files
    provider_type: inline::localfs
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
      metadata_store:
-        type: sqlite
+        table_name: files_metadata
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db
+        backend: sql_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  post_training:
  - provider_id: huggingface-gpu
    provider_type: inline::huggingface-gpu
@ -166,21 +184,21 @@ providers:
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -210,17 +228,28 @@ providers:
    provider_type: inline::reference
    config:
      kvstore:
-        type: sqlite
+        namespace: batches
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db
+        backend: kv_default
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
@ -242,3 +271,8 @@ server:
  port: 8321
 telemetry:
  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@ -26,6 +26,8 @@ distribution_spec:
    - provider_type: inline::milvus
    - provider_type: remote::chromadb
    - provider_type: remote::pgvector
    - provider_type: remote::qdrant
    - provider_type: remote::weaviate
    files:
    - provider_type: inline::localfs
    safety:
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@ -93,30 +93,30 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
+        backend: kv_default
  - provider_id: sqlite-vec
    provider_type: inline::sqlite-vec
    config:
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::sqlite_vec
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
+        backend: kv_default
  - provider_id: ${env.MILVUS_URL:+milvus}
    provider_type: inline::milvus
    config:
      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::milvus
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
+        backend: kv_default
  - provider_id: ${env.CHROMADB_URL:+chromadb}
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::chroma_remote
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
+        backend: kv_default
  - provider_id: ${env.PGVECTOR_DB:+pgvector}
    provider_type: remote::pgvector
    config:
@ -125,17 +125,32 @@ providers:
      db: ${env.PGVECTOR_DB:=}
      user: ${env.PGVECTOR_USER:=}
      password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::pgvector
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
+        backend: kv_default
  - provider_id: ${env.QDRANT_URL:+qdrant}
    provider_type: remote::qdrant
    config:
      api_key: ${env.QDRANT_API_KEY:=}
      persistence:
        namespace: vector_io::qdrant_remote
        backend: kv_default
  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
    provider_type: remote::weaviate
    config:
      weaviate_api_key: null
      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
      persistence:
        namespace: vector_io::weaviate
        backend: kv_default
  files:
  - provider_id: meta-reference-files
    provider_type: inline::localfs
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
      metadata_store:
-        type: sqlite
+        table_name: files_metadata
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
+        backend: sql_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  post_training:
  - provider_id: torchtune-cpu
    provider_type: inline::torchtune-cpu
@ -163,21 +181,21 @@ providers:
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -207,17 +225,28 @@ providers:
    provider_type: inline::reference
    config:
      kvstore:
-        type: sqlite
+        namespace: batches
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db
+        backend: kv_default
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
@ -239,3 +268,8 @@ server:
  port: 8321
 telemetry:
  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@ -11,8 +11,10 @@ from llama_stack.core.datatypes import (
    BuildProvider,
    Provider,
    ProviderSpec,
    QualifiedModel,
    ShieldInput,
    ToolGroupInput,
    VectorStoresConfig,
 )
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
@ -31,6 +33,8 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC
 from llama_stack.providers.remote.vector_io.pgvector.config import (
    PGVectorVectorIOConfig,
 )
 from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
 from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
@ -113,6 +117,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
            BuildProvider(provider_type="inline::milvus"),
            BuildProvider(provider_type="remote::chromadb"),
            BuildProvider(provider_type="remote::pgvector"),
            BuildProvider(provider_type="remote::qdrant"),
            BuildProvider(provider_type="remote::weaviate"),
        ],
        "files": [BuildProvider(provider_type="inline::localfs")],
        "safety": [
@ -221,12 +227,35 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                                password="${env.PGVECTOR_PASSWORD:=}",
                            ),
                        ),
                        Provider(
                            provider_id="${env.QDRANT_URL:+qdrant}",
                            provider_type="remote::qdrant",
                            config=QdrantVectorIOConfig.sample_run_config(
                                f"~/.llama/distributions/{name}",
                                url="${env.QDRANT_URL:=}",
                            ),
                        ),
                        Provider(
                            provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
                            provider_type="remote::weaviate",
                            config=WeaviateVectorIOConfig.sample_run_config(
                                f"~/.llama/distributions/{name}",
                                cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
                            ),
                        ),
                    ],
                    "files": [files_provider],
                },
                default_models=[],
                default_tool_groups=default_tool_groups,
                default_shields=default_shields,
                vector_stores_config=VectorStoresConfig(
                    default_provider_id="faiss",
                    default_embedding_model=QualifiedModel(
                        provider_id="sentence-transformers",
                        model_id="nomic-ai/nomic-embed-text-v1.5",
                    ),
                ),
            ),
        },
        run_config_env_vars={
--- a/llama_stack/distributions/template.py
+++ b/llama_stack/distributions/template.py
@ -27,8 +27,15 @@ from llama_stack.core.datatypes import (
    ShieldInput,
    TelemetryConfig,
    ToolGroupInput,
    VectorStoresConfig,
 )
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    SqlStoreReference,
    StorageBackendType,
 )
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
@ -180,10 +187,10 @@ class RunConfigSettings(BaseModel):
    default_tool_groups: list[ToolGroupInput] | None = None
    default_datasets: list[DatasetInput] | None = None
    default_benchmarks: list[BenchmarkInput] | None = None
-    metadata_store: dict | None = None
+    vector_stores_config: VectorStoresConfig | None = None
    inference_store: dict | None = None
    conversations_store: dict | None = None
    telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
    storage_backends: dict[str, Any] | None = None
    storage_stores: dict[str, Any] | None = None
    def run_config(
        self,
@ -226,28 +233,45 @@ class RunConfigSettings(BaseModel):
        # Get unique set of APIs from providers
        apis = sorted(providers.keys())
        storage_backends = self.storage_backends or {
            "kv_default": SqliteKVStoreConfig.sample_run_config(
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="kvstore.db",
            ),
            "sql_default": SqliteSqlStoreConfig.sample_run_config(
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="sql_store.db",
            ),
        }
        storage_stores = self.storage_stores or {
            "metadata": KVStoreReference(
                backend="kv_default",
                namespace="registry",
            ).model_dump(exclude_none=True),
            "inference": InferenceStoreReference(
                backend="sql_default",
                table_name="inference_store",
            ).model_dump(exclude_none=True),
            "conversations": SqlStoreReference(
                backend="sql_default",
                table_name="openai_conversations",
            ).model_dump(exclude_none=True),
        }
        storage_config = dict(
            backends=storage_backends,
            stores=storage_stores,
        )
        # Return a dict that matches StackRunConfig structure
-        return {
+        config = {
            "version": LLAMA_STACK_RUN_CONFIG_VERSION,
            "image_name": name,
            "container_image": container_image,
            "apis": apis,
            "providers": provider_configs,
-            "metadata_store": self.metadata_store
+            "storage": storage_config,
            or SqliteKVStoreConfig.sample_run_config(
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="registry.db",
            ),
            "inference_store": self.inference_store
            or SqliteSqlStoreConfig.sample_run_config(
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="inference_store.db",
            ),
            "conversations_store": self.conversations_store
            or SqliteSqlStoreConfig.sample_run_config(
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="conversations.db",
            ),
            "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
            "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
            "vector_dbs": [],
@ -261,6 +285,11 @@ class RunConfigSettings(BaseModel):
            "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
        }
        if self.vector_stores_config:
            config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
        return config
 class DistributionTemplate(BaseModel):
    """
@ -297,11 +326,15 @@ class DistributionTemplate(BaseModel):
            # We should have a better way to do this by formalizing the concept of "internal" APIs
            # and providers, with a way to specify dependencies for them.
-            if run_config_.get("inference_store"):
+            storage_cfg = run_config_.get("storage", {})
-                additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
+            for backend_cfg in storage_cfg.get("backends", {}).values():
-
+                store_type = backend_cfg.get("type")
-            if run_config_.get("metadata_store"):
+                if not store_type:
-                additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
+                    continue
                if str(store_type).startswith("kv_"):
                    additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
                elif str(store_type).startswith("sql_"):
                    additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
        if self.additional_pip_packages:
            additional_pip_packages.extend(self.additional_pip_packages)
@ -387,11 +420,13 @@ class DistributionTemplate(BaseModel):
        def enum_representer(dumper, data):
            return dumper.represent_scalar("tag:yaml.org,2002:str", data.value)
-        # Register YAML representer for ModelType
+        # Register YAML representer for enums
        yaml.add_representer(ModelType, enum_representer)
        yaml.add_representer(DatasetPurpose, enum_representer)
        yaml.add_representer(StorageBackendType, enum_representer)
        yaml.SafeDumper.add_representer(ModelType, enum_representer)
        yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)
        yaml.SafeDumper.add_representer(StorageBackendType, enum_representer)
        for output_dir in [yaml_output_dir, doc_output_dir]:
            output_dir.mkdir(parents=True, exist_ok=True)
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@ -22,9 +22,9 @@ providers:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
-      kvstore:
+      persistence:
-        type: sqlite
+        namespace: vector_io::faiss
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
+        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -34,32 +34,35 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
+          namespace: agents
-      responses_store:
+          backend: kv_default
-        type: sqlite
+        responses:
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db
+          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
-        type: sqlite
+        namespace: eval
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
+        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::huggingface
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
+        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
-        type: sqlite
+        namespace: datasetio::localfs
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
+        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -90,17 +93,28 @@ providers:
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
      metadata_store:
-        type: sqlite
+        table_name: files_metadata
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
+        backend: sql_default
-metadata_store:
+storage:
-  type: sqlite
+  backends:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
+    kv_default:
-inference_store:
+      type: kv_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
+    sql_default:
-conversations_store:
+      type: sql_sqlite
-  type: sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/conversations.db
+  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
 models: []
 shields: []
 vector_dbs: []
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -83,8 +83,8 @@ class MetaReferenceAgentsImpl(Agents):
        self.policy = policy
    async def initialize(self) -> None:
-        self.persistence_store = await kvstore_impl(self.config.persistence_store)
+        self.persistence_store = await kvstore_impl(self.config.persistence.agent_state)
-        self.responses_store = ResponsesStore(self.config.responses_store, self.policy)
+        self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy)
        await self.responses_store.initialize()
        self.openai_responses_impl = OpenAIResponsesImpl(
            inference_api=self.inference_api,
--- a/llama_stack/providers/inline/agents/meta_reference/config.py
+++ b/llama_stack/providers/inline/agents/meta_reference/config.py
@ -8,24 +8,30 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.providers.utils.kvstore import KVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+
 class AgentPersistenceConfig(BaseModel):
    """Nested persistence configuration for agents."""
    agent_state: KVStoreReference
    responses: ResponsesStoreReference
 class MetaReferenceAgentsImplConfig(BaseModel):
-    persistence_store: KVStoreConfig
+    persistence: AgentPersistenceConfig
    responses_store: SqlStoreConfig
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
-            "persistence_store": SqliteKVStoreConfig.sample_run_config(
+            "persistence": {
-                __distro_dir__=__distro_dir__,
+                "agent_state": KVStoreReference(
-                db_name="agents_store.db",
+                    backend="kv_default",
-            ),
+                    namespace="agents",
-            "responses_store": SqliteSqlStoreConfig.sample_run_config(
+                ).model_dump(exclude_none=True),
-                __distro_dir__=__distro_dir__,
+                "responses": ResponsesStoreReference(
-                db_name="responses_store.db",
+                    backend="sql_default",
-            ),
+                    table_name="responses",
                ).model_dump(exclude_none=True),
            }
        }
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -359,6 +359,7 @@ class OpenAIResponsesImpl:
            tool_executor=self.tool_executor,
            safety_api=self.safety_api,
            guardrail_ids=guardrail_ids,
            instructions=instructions,
        )
        # Stream the response
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -110,6 +110,7 @@ class StreamingResponseOrchestrator:
        text: OpenAIResponseText,
        max_infer_iters: int,
        tool_executor,  # Will be the tool execution logic from the main class
        instructions: str,
        safety_api,
        guardrail_ids: list[str] | None = None,
    ):
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
        self.accumulated_usage: OpenAIResponseUsage | None = None
        # Track if we've sent a refusal response
        self.violation_detected = False
        # system message that is inserted into the model's context
        self.instructions = instructions
    async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
        """Create a refusal response to replace streaming content."""
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
            tools=self.ctx.available_tools(),
            error=error,
            usage=self.accumulated_usage,
            instructions=self.instructions,
        )
    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
--- a/llama_stack/providers/inline/batches/reference/config.py
+++ b/llama_stack/providers/inline/batches/reference/config.py
@ -6,13 +6,13 @@
 from pydantic import BaseModel, Field
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference
 class ReferenceBatchesImplConfig(BaseModel):
    """Configuration for the Reference Batches implementation."""
-    kvstore: KVStoreConfig = Field(
+    kvstore: KVStoreReference = Field(
        description="Configuration for the key-value store backend.",
    )
@ -33,8 +33,8 @@ class ReferenceBatchesImplConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict:
        return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "kvstore": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="batches.db",
+                namespace="batches",
-            ),
+            ).model_dump(exclude_none=True),
        }
--- a/llama_stack/providers/inline/datasetio/localfs/config.py
+++ b/llama_stack/providers/inline/datasetio/localfs/config.py
@ -7,20 +7,17 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.providers.utils.kvstore.config import (
+from llama_stack.core.storage.datatypes import KVStoreReference
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
 class LocalFSDatasetIOConfig(BaseModel):
-    kvstore: KVStoreConfig
+    kvstore: KVStoreReference
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
        return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "kvstore": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="localfs_datasetio.db",
+                namespace="datasetio::localfs",
-            )
+            ).model_dump(exclude_none=True)
        }
--- a/llama_stack/providers/inline/eval/meta_reference/config.py
+++ b/llama_stack/providers/inline/eval/meta_reference/config.py
@ -7,20 +7,17 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.providers.utils.kvstore.config import (
+from llama_stack.core.storage.datatypes import KVStoreReference
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
 class MetaReferenceEvalConfig(BaseModel):
-    kvstore: KVStoreConfig
+    kvstore: KVStoreReference
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
        return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "kvstore": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="meta_reference_eval.db",
+                namespace="eval",
-            )
+            ).model_dump(exclude_none=True)
        }
--- a/llama_stack/providers/inline/files/localfs/config.py
+++ b/llama_stack/providers/inline/files/localfs/config.py
@ -8,14 +8,14 @@ from typing import Any
 from pydantic import BaseModel, Field
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+from llama_stack.core.storage.datatypes import SqlStoreReference
 class LocalfsFilesImplConfig(BaseModel):
    storage_dir: str = Field(
        description="Directory to store uploaded files",
    )
-    metadata_store: SqlStoreConfig = Field(
+    metadata_store: SqlStoreReference = Field(
        description="SQL store configuration for file metadata",
    )
    ttl_secs: int = 365 * 24 * 60 * 60  # 1 year
@ -24,8 +24,8 @@ class LocalfsFilesImplConfig(BaseModel):
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
            "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
-            "metadata_store": SqliteSqlStoreConfig.sample_run_config(
+            "metadata_store": SqlStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="sql_default",
-                db_name="files_metadata.db",
+                table_name="files_metadata",
-            ),
+            ).model_dump(exclude_none=True),
        }
--- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl(
                provider_id=self.__provider_id__,
                metadata={
                    "embedding_dimension": 768,
                    "default_configured": True,
                },
                model_type=ModelType.embedding,
            ),
--- a/llama_stack/providers/inline/vector_io/chroma/init.py
+++ b/llama_stack/providers/inline/vector_io/chroma/init.py
@ -12,15 +12,8 @@ from .config import ChromaVectorIOConfig
 async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
-    from llama_stack.providers.remote.vector_io.chroma.chroma import (
+    from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter
        ChromaVectorIOAdapter,
    )
-    impl = ChromaVectorIOAdapter(
+    impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
        config,
        deps[Api.inference],
        deps[Api.models],
        deps.get(Api.files),
    )
    await impl.initialize()
    return impl
--- a/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/llama_stack/providers/inline/vector_io/chroma/config.py
@ -8,14 +8,14 @@ from typing import Any
 from pydantic import BaseModel, Field
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class ChromaVectorIOConfig(BaseModel):
    db_path: str
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend")
+    persistence: KVStoreReference = Field(description="Config for KV store backend")
    @classmethod
    def sample_run_config(
@ -23,8 +23,8 @@ class ChromaVectorIOConfig(BaseModel):
    ) -> dict[str, Any]:
        return {
            "db_path": db_path,
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "persistence": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="chroma_inline_registry.db",
+                namespace="vector_io::chroma",
-            ),
+            ).model_dump(exclude_none=True),
        }
--- a/llama_stack/providers/inline/vector_io/faiss/init.py
+++ b/llama_stack/providers/inline/vector_io/faiss/init.py
@ -16,11 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
    assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = FaissVectorIOAdapter(
+    impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
        config,
        deps[Api.inference],
        deps[Api.models],
        deps.get(Api.files),
    )
    await impl.initialize()
    return impl
--- a/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/llama_stack/providers/inline/vector_io/faiss/config.py
@ -8,22 +8,19 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.providers.utils.kvstore.config import (
+from llama_stack.core.storage.datatypes import KVStoreReference
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
 from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class FaissVectorIOConfig(BaseModel):
-    kvstore: KVStoreConfig
+    persistence: KVStoreReference
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
        return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "persistence": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="faiss_store.db",
+                namespace="vector_io::faiss",
-            )
+            ).model_dump(exclude_none=True)
        }
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -17,27 +17,14 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
 from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import (
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
    Chunk,
    QueryChunksResponse,
    VectorIO,
 )
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate
    HealthResponse,
    HealthStatus,
    VectorDBsProtocolPrivate,
 )
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import (
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
    ChunkForDeletion,
    EmbeddingIndex,
    VectorDBWithIndex,
 )
 from .config import FaissVectorIOConfig
@ -155,12 +142,7 @@ class FaissIndex(EmbeddingIndex):
        await self._save_index()
-    async def query_vector(
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
        self,
        embedding: NDArray,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
        chunks = []
        scores = []
@ -175,12 +157,7 @@ class FaissIndex(EmbeddingIndex):
        return QueryChunksResponse(chunks=chunks, scores=scores)
-    async def query_keyword(
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
        self,
        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        raise NotImplementedError(
            "Keyword search is not supported - underlying DB FAISS does not support this search mode"
        )
@ -200,21 +177,14 @@ class FaissIndex(EmbeddingIndex):
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
-    def __init__(
+    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
        self,
        config: FaissVectorIOConfig,
        inference_api: Inference,
        models_api: Models,
        files_api: Files | None,
    ) -> None:
        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
        self.models_api = models_api
        self.cache: dict[str, VectorDBWithIndex] = {}
    async def initialize(self) -> None:
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
        # Load existing banks from kvstore
        start_key = VECTOR_DBS_PREFIX
        end_key = f"{VECTOR_DBS_PREFIX}\xff"
@ -252,17 +222,11 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
        except Exception as e:
            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
-    async def register_vector_db(
+    async def register_vector_db(self, vector_db: VectorDB) -> None:
        self,
        vector_db: VectorDB,
    ) -> None:
        assert self.kvstore is not None
        key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
-        await self.kvstore.set(
+        await self.kvstore.set(key=key, value=vector_db.model_dump_json())
            key=key,
            value=vector_db.model_dump_json(),
        )
        # Store in cache
        self.cache[vector_db.identifier] = VectorDBWithIndex(
@ -285,12 +249,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
        del self.cache[vector_db_id]
        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
-    async def insert_chunks(
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
        self,
        vector_db_id: str,
        chunks: list[Chunk],
        ttl_seconds: int | None = None,
    ) -> None:
        index = self.cache.get(vector_db_id)
        if index is None:
            raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
@ -298,10 +257,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
        await index.insert_chunks(chunks)
    async def query_chunks(
-        self,
+        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
        vector_db_id: str,
        query: InterleavedContent,
        params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
        index = self.cache.get(vector_db_id)
        if index is None:
--- a/llama_stack/providers/inline/vector_io/milvus/init.py
+++ b/llama_stack/providers/inline/vector_io/milvus/init.py
@ -14,11 +14,6 @@ from .config import MilvusVectorIOConfig
 async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
    from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
-    impl = MilvusVectorIOAdapter(
+    impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
        config,
        deps[Api.inference],
        deps[Api.models],
        deps.get(Api.files),
    )
    await impl.initialize()
    return impl
--- a/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/llama_stack/providers/inline/vector_io/milvus/config.py
@ -8,25 +8,22 @@ from typing import Any
 from pydantic import BaseModel, Field
-from llama_stack.providers.utils.kvstore.config import (
+from llama_stack.core.storage.datatypes import KVStoreReference
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
 from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class MilvusVectorIOConfig(BaseModel):
    db_path: str
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
    consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
        return {
            "db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "persistence": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="milvus_registry.db",
+                namespace="vector_io::milvus",
-            ),
+            ).model_dump(exclude_none=True),
        }
--- a/llama_stack/providers/inline/vector_io/qdrant/init.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/init.py
@ -15,11 +15,6 @@ async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]):
    from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
    assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = QdrantVectorIOAdapter(
+    impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
        config,
        deps[Api.inference],
        deps[Api.models],
        deps.get(Api.files),
    )
    await impl.initialize()
    return impl
--- a/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@ -9,23 +9,21 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.providers.utils.kvstore.config import (
+from llama_stack.core.storage.datatypes import KVStoreReference
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
 from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class QdrantVectorIOConfig(BaseModel):
    path: str
-    kvstore: KVStoreConfig
+    persistence: KVStoreReference
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
            "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "persistence": KVStoreReference(
-                __distro_dir__=__distro_dir__, db_name="qdrant_registry.db"
+                backend="kv_default",
-            ),
+                namespace="vector_io::qdrant",
            ).model_dump(exclude_none=True),
        }
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/init.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/init.py
@ -15,11 +15,6 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
    from .sqlite_vec import SQLiteVecVectorIOAdapter
    assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = SQLiteVecVectorIOAdapter(
+    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
        config,
        deps[Api.inference],
        deps[Api.models],
        deps.get(Api.files),
    )
    await impl.initialize()
    return impl
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
@ -8,22 +8,19 @@ from typing import Any
 from pydantic import BaseModel, Field
-from llama_stack.providers.utils.kvstore.config import (
+from llama_stack.core.storage.datatypes import KVStoreReference
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
 class SQLiteVectorIOConfig(BaseModel):
    db_path: str = Field(description="Path to the SQLite database file")
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "persistence": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="sqlite_vec_registry.db",
+                namespace="vector_io::sqlite_vec",
-            ),
+            ).model_dump(exclude_none=True),
        }
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -17,13 +17,8 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import (
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
    Chunk,
    QueryChunksResponse,
    VectorIO,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
@ -175,32 +170,18 @@ class SQLiteVecIndex(EmbeddingIndex):
                    # Insert vector embeddings
                    embedding_data = [
-                        (
+                        ((chunk.chunk_id, serialize_vector(emb.tolist())))
                            (
                                chunk.chunk_id,
                                serialize_vector(emb.tolist()),
                            )
                        )
                        for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
                    ]
-                    cur.executemany(
+                    cur.executemany(f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", embedding_data)
                        f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);",
                        embedding_data,
                    )
                    # Insert FTS content
                    fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
                    # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
-                    cur.executemany(
+                    cur.executemany(f"DELETE FROM [{self.fts_table}] WHERE id = ?;", [(row[0],) for row in fts_data])
                        f"DELETE FROM [{self.fts_table}] WHERE id = ?;",
                        [(row[0],) for row in fts_data],
                    )
                    # INSERT new entries
-                    cur.executemany(
+                    cur.executemany(f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", fts_data)
                        f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);",
                        fts_data,
                    )
                connection.commit()
@ -216,12 +197,7 @@ class SQLiteVecIndex(EmbeddingIndex):
        # Run batch insertion in a background thread
        await asyncio.to_thread(_execute_all_batch_inserts)
-    async def query_vector(
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
        self,
        embedding: NDArray,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        """
        Performs vector-based search using a virtual table for vector similarity.
        """
@ -261,12 +237,7 @@ class SQLiteVecIndex(EmbeddingIndex):
            scores.append(score)
        return QueryChunksResponse(chunks=chunks, scores=scores)
-    async def query_keyword(
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
        self,
        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        """
        Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search.
        """
@ -410,22 +381,15 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
    and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
    """
-    def __init__(
+    def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
        self,
        config,
        inference_api: Inference,
        models_api: Models,
        files_api: Files | None,
    ) -> None:
        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
        self.models_api = models_api
        self.cache: dict[str, VectorDBWithIndex] = {}
        self.vector_db_store = None
    async def initialize(self) -> None:
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
        start_key = VECTOR_DBS_PREFIX
        end_key = f"{VECTOR_DBS_PREFIX}\xff"
@ -433,9 +397,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
        for db_json in stored_vector_dbs:
            vector_db = VectorDB.model_validate_json(db_json)
            index = await SQLiteVecIndex.create(
-                vector_db.embedding_dimension,
+                vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
                self.config.db_path,
                vector_db.identifier,
            )
            self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
@ -450,11 +412,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
        return [v.vector_db for v in self.cache.values()]
    async def register_vector_db(self, vector_db: VectorDB) -> None:
-        index = await SQLiteVecIndex.create(
+        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
            vector_db.embedding_dimension,
            self.config.db_path,
            vector_db.identifier,
        )
        self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
--- a/llama_stack/providers/remote/datasetio/huggingface/config.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/config.py
@ -7,20 +7,17 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.providers.utils.kvstore.config import (
+from llama_stack.core.storage.datatypes import KVStoreReference
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
 class HuggingfaceDatasetIOConfig(BaseModel):
-    kvstore: KVStoreConfig
+    kvstore: KVStoreReference
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
        return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
+            "kvstore": KVStoreReference(
-                __distro_dir__=__distro_dir__,
+                backend="kv_default",
-                db_name="huggingface_datasetio.db",
+                namespace="datasetio::huggingface",
-            )
+            ).model_dump(exclude_none=True)
        }
--- a/Show more
+++ b/Show more