Merge branch 'main' into add_max_tool_calls

2025-12-03 09:53:45 +00:00 · 2025-11-10 12:01:32 -08:00 · 2025-11-10 12:01:32 -08:00 · dfa2210319
commit dfa2210319
parent 835e6c60ad fadf17daf3
25 changed files with 1889 additions and 1700 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -998,39 +998,6 @@ paths:
      description: List models using the OpenAI API.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Model.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Model'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Register model.
-      description: >-
-        Register model.
-
-        Register a model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterModelRequest'
-        required: true
-      deprecated: false
  /v1/models/{model_id}:
    get:
      responses:
@ -1065,36 +1032,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Unregister model.
-      description: >-
-        Unregister model.
-
-        Unregister a model.
-      parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/moderations:
    post:
      responses:
@ -1725,32 +1662,6 @@ paths:
      description: List all scoring functions.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
-      description: Register a scoring function.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
-        required: true
-      deprecated: false
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -1782,33 +1693,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
-      description: Unregister a scoring function.
-      parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/scoring/score:
    post:
      responses:
@ -1897,36 +1781,6 @@ paths:
      description: List all shields.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Shield.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Shield'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Register a shield.
-      description: Register a shield.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterShieldRequest'
-        required: true
-      deprecated: false
  /v1/shields/{identifier}:
    get:
      responses:
@ -1958,33 +1812,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Unregister a shield.
-      description: Unregister a shield.
-      parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -2080,32 +1907,6 @@ paths:
      description: List tool groups with optional provider.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Register a tool group.
-      description: Register a tool group.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterToolGroupRequest'
-        required: true
-      deprecated: false
  /v1/toolgroups/{toolgroup_id}:
    get:
      responses:
@ -2137,32 +1938,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Unregister a tool group.
-      description: Unregister a tool group.
-      parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/tools:
    get:
      responses:
@ -2916,11 +2691,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -3171,7 +2946,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterDatasetRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1beta/datasets/{dataset_id}:
    get:
      responses:
@ -3228,7 +3003,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks:
    get:
      responses:
@ -3279,7 +3054,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterBenchmarkRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}:
    get:
      responses:
@ -3336,7 +3111,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -6280,46 +6055,6 @@ components:
      required:
        - data
      title: OpenAIListModelsResponse
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
    Model:
      type: object
      properties:
@ -6377,6 +6112,15 @@ components:
      title: Model
      description: >-
        A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
    RunModerationRequest:
      type: object
      properties:
@ -9130,61 +8874,6 @@ components:
      required:
        - data
      title: ListScoringFunctionsResponse
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
    ScoreRequest:
      type: object
      properties:
@ -9360,35 +9049,6 @@ components:
      required:
        - data
      title: ListShieldsResponse
-    RegisterShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
    InvokeToolRequest:
      type: object
      properties:
@ -9649,37 +9309,6 @@ components:
      title: ListToolGroupsResponse
      description: >-
        Response containing a list of tool groups.
-    RegisterToolGroupRequest:
-      type: object
-      properties:
-        toolgroup_id:
-          type: string
-          description: The ID of the tool group to register.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the tool group.
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
    Chunk:
      type: object
      properties:
@ -10480,41 +10109,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
          description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
@ -10831,68 +10454,6 @@ components:
        - data
      title: ListDatasetsResponse
      description: Response from listing datasets.
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
    Benchmark:
      type: object
      properties:
@ -10960,47 +10521,6 @@ components:
      required:
        - data
      title: ListBenchmarksResponse
-    RegisterBenchmarkRequest:
-      type: object
-      properties:
-        benchmark_id:
-          type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
    BenchmarkConfig:
      type: object
      properties:
@ -11862,6 +11382,109 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
  responses:
    BadRequest400:
      description: The request was invalid or malformed
--- a/docs/docs/deploying/kubernetes_deployment.mdx
+++ b/docs/docs/deploying/kubernetes_deployment.mdx
@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';

 # Kubernetes Deployment Guide

-Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers both local development with Kind and production deployment on AWS EKS.
+Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers deployment using the Kubernetes operator to manage the Llama Stack server with Kind. The vLLM inference server is deployed manually.

 ## Prerequisites

@ -110,115 +110,176 @@ spec:
 EOF
 ```

-### Step 3: Configure Llama Stack
+### Step 3: Install Kubernetes Operator

-Update your run configuration:
-
-```yaml
-providers:
-  inference:
-  - provider_id: vllm
-    provider_type: remote::vllm
-    config:
-      url: http://vllm-server.default.svc.cluster.local:8000/v1
-      max_tokens: 4096
-      api_token: fake
-```
-
-Build container image:
+Install the Llama Stack Kubernetes operator to manage Llama Stack deployments:

 ```bash
-tmp_dir=$(mktemp -d) && cat >$tmp_dir/Containerfile.llama-stack-run-k8s <<EOF
-FROM distribution-myenv:dev
-RUN apt-get update && apt-get install -y git
-RUN git clone https://github.com/meta-llama/llama-stack.git /app/llama-stack-source
-ADD ./vllm-llama-stack-run-k8s.yaml /app/config.yaml
-EOF
-podman build -f $tmp_dir/Containerfile.llama-stack-run-k8s -t llama-stack-run-k8s $tmp_dir
+# Install from the latest main branch
+kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/release/operator.yaml
+
+# Or install a specific version (e.g., v0.4.0)
+# kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/v0.4.0/release/operator.yaml
 ```

-### Step 4: Deploy Llama Stack Server
+Verify the operator is running:
+
+```bash
+kubectl get pods -n llama-stack-operator-system
+```
+
+For more information about the operator, see the [llama-stack-k8s-operator repository](https://github.com/llamastack/llama-stack-k8s-operator).
+
+### Step 4: Deploy Llama Stack Server using Operator
+
+Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
+You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).

 ```yaml
 cat <<EOF | kubectl apply -f -
-apiVersion: v1
-kind: PersistentVolumeClaim
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
 metadata:
-  name: llama-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llama-stack-server
+  name: llamastack-vllm
 spec:
  replicas: 1
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: llama-stack
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: llama-stack
-    spec:
-      containers:
-      - name: llama-stack
-        image: localhost/llama-stack-run-k8s:latest
-        imagePullPolicy: IfNotPresent
-        command: ["llama", "stack", "run", "/app/config.yaml"]
-        ports:
-          - containerPort: 5000
-        volumeMounts:
-          - name: llama-storage
-            mountPath: /root/.llama
-      volumes:
-      - name: llama-storage
-        persistentVolumeClaim:
-          claimName: llama-pvc
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: llama-stack-service
-spec:
-  selector:
-    app.kubernetes.io/name: llama-stack
-  ports:
-  - protocol: TCP
-    port: 5000
-    targetPort: 5000
-  type: ClusterIP
+  server:
+    distribution:
+      name: starter
+    containerSpec:
+      port: 8321
+      env:
+      - name: VLLM_URL
+        value: "http://vllm-server.default.svc.cluster.local:8000/v1"
+      - name: VLLM_MAX_TOKENS
+        value: "4096"
+      - name: VLLM_API_TOKEN
+        value: "fake"
+    # Optional: override run.yaml from a ConfigMap using userConfig
+    userConfig:
+      configMap:
+        name: llama-stack-config
+    storage:
+      size: "20Gi"
+      mountPath: "/home/lls/.lls"
 EOF
 ```

+**Configuration Options:**
+
+- `replicas`: Number of Llama Stack server instances to run
+- `server.distribution.name`: The distribution to use (e.g., `starter` for the starter distribution). See the [list of supported distributions](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository.
+- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
+- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
+- `server.containerSpec.env`: Environment variables to configure providers:
+- `server.userConfig`: (Optional) Override the default `run.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
+- `server.storage.size`: Size of the persistent volume for model and data storage
+- `server.storage.mountPath`: Where to mount the storage in the container
+
+**Note:** For a complete list of supported distributions, see [distributions.json](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository. To use a custom or non-supported distribution, set the `server.distribution.image` field with your container image instead of  `server.distribution.name`.
+
+The operator automatically creates:
+- A Deployment for the Llama Stack server
+- A Service to access the server
+- A PersistentVolumeClaim for storage
+- All necessary RBAC resources
+
+
+Check the status of your deployment:
+
+```bash
+kubectl get llamastackdistribution
+kubectl describe llamastackdistribution llamastack-vllm
+```
+
 ### Step 5: Test Deployment

+Wait for the Llama Stack server pod to be ready:
+
 ```bash
-# Port forward and test
-kubectl port-forward service/llama-stack-service 5000:5000
-llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
+# Check the status of the LlamaStackDistribution
+kubectl get llamastackdistribution llamastack-vllm
+
+# Check the pods created by the operator
+kubectl get pods -l app.kubernetes.io/name=llama-stack
+
+# Wait for the pod to be ready
+kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=llama-stack --timeout=300s
+```
+
+Get the service name created by the operator (it typically follows the pattern `<llamastackdistribution-name>-service`):
+
+```bash
+# List services to find the service name
+kubectl get services | grep llamastack
+
+# Port forward and test (replace SERVICE_NAME with the actual service name)
+kubectl port-forward service/llamastack-vllm-service 8321:8321
+```
+
+In another terminal, test the deployment:
+
+```bash
+llama-stack-client --endpoint http://localhost:8321 inference chat-completion --message "hello, what model are you?"
 ```

 ## Troubleshooting

-**Check pod status:**
+### vLLM Server Issues
+
+**Check vLLM pod status:**
 ```bash
 kubectl get pods -l app.kubernetes.io/name=vllm
 kubectl logs -l app.kubernetes.io/name=vllm
 ```

-**Test service connectivity:**
+**Test vLLM service connectivity:**
 ```bash
 kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models
 ```

+### Llama Stack Server Issues
+
+**Check LlamaStackDistribution status:**
+```bash
+# Get detailed status
+kubectl describe llamastackdistribution llamastack-vllm
+
+# Check for events
+kubectl get events --sort-by='.lastTimestamp' | grep llamastack-vllm
+```
+
+**Check operator-managed pods:**
+```bash
+# List all pods managed by the operator
+kubectl get pods -l app.kubernetes.io/name=llama-stack
+
+# Check pod logs (replace POD_NAME with actual pod name)
+kubectl logs -l app.kubernetes.io/name=llama-stack
+```
+
+**Check operator status:**
+```bash
+# Verify the operator is running
+kubectl get pods -n llama-stack-operator-system
+
+# Check operator logs if issues persist
+kubectl logs -n llama-stack-operator-system -l control-plane=controller-manager
+```
+
+**Verify service connectivity:**
+```bash
+# Get the service endpoint
+kubectl get svc llamastack-vllm-service
+
+# Test connectivity from within the cluster
+kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://llamastack-vllm-service:8321/health
+```
+
 ## Related Resources

 - **[Deployment Overview](/docs/deploying/)** - Overview of deployment options
 - **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions
 - **[Configuration](/docs/distributions/configuration)** - Detailed configuration options
+- **[LlamaStack Operator](https://github.com/llamastack/llama-stack-k8s-operator)** - Overview of llama-stack kubernetes operator
+- **[LlamaStackDistribution](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md)** - API Spec of the llama-stack operator Custom Resource.
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -162,7 +162,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterDatasetRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1beta/datasets/{dataset_id}:
    get:
      responses:
@ -219,7 +219,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks:
    get:
      responses:
@ -270,7 +270,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterBenchmarkRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}:
    get:
      responses:
@ -327,7 +327,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -936,68 +936,6 @@ components:
        - data
      title: ListDatasetsResponse
      description: Response from listing datasets.
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
    Benchmark:
      type: object
      properties:
@ -1065,47 +1003,6 @@ components:
      required:
        - data
      title: ListBenchmarksResponse
-    RegisterBenchmarkRequest:
-      type: object
-      properties:
-        benchmark_id:
-          type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
    AggregationFunctionType:
      type: string
      enum:
@ -2254,6 +2151,109 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
  responses:
    BadRequest400:
      description: The request was invalid or malformed
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -995,39 +995,6 @@ paths:
      description: List models using the OpenAI API.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Model.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Model'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Register model.
-      description: >-
-        Register model.
-
-        Register a model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterModelRequest'
-        required: true
-      deprecated: false
  /v1/models/{model_id}:
    get:
      responses:
@ -1062,36 +1029,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Unregister model.
-      description: >-
-        Unregister model.
-
-        Unregister a model.
-      parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/moderations:
    post:
      responses:
@ -1722,32 +1659,6 @@ paths:
      description: List all scoring functions.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
-      description: Register a scoring function.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
-        required: true
-      deprecated: false
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -1779,33 +1690,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
-      description: Unregister a scoring function.
-      parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/scoring/score:
    post:
      responses:
@ -1894,36 +1778,6 @@ paths:
      description: List all shields.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Shield.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Shield'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Register a shield.
-      description: Register a shield.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterShieldRequest'
-        required: true
-      deprecated: false
  /v1/shields/{identifier}:
    get:
      responses:
@ -1955,33 +1809,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Unregister a shield.
-      description: Unregister a shield.
-      parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -2077,32 +1904,6 @@ paths:
      description: List tool groups with optional provider.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Register a tool group.
-      description: Register a tool group.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterToolGroupRequest'
-        required: true
-      deprecated: false
  /v1/toolgroups/{toolgroup_id}:
    get:
      responses:
@ -2134,32 +1935,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Unregister a tool group.
-      description: Unregister a tool group.
-      parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/tools:
    get:
      responses:
@ -2913,11 +2688,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -5564,46 +5339,6 @@ components:
      required:
        - data
      title: OpenAIListModelsResponse
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
    Model:
      type: object
      properties:
@ -5661,6 +5396,15 @@ components:
      title: Model
      description: >-
        A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
    RunModerationRequest:
      type: object
      properties:
@ -8414,61 +8158,6 @@ components:
      required:
        - data
      title: ListScoringFunctionsResponse
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
    ScoreRequest:
      type: object
      properties:
@ -8644,35 +8333,6 @@ components:
      required:
        - data
      title: ListShieldsResponse
-    RegisterShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
    InvokeToolRequest:
      type: object
      properties:
@ -8933,37 +8593,6 @@ components:
      title: ListToolGroupsResponse
      description: >-
        Response containing a list of tool groups.
-    RegisterToolGroupRequest:
-      type: object
-      properties:
-        toolgroup_id:
-          type: string
-          description: The ID of the tool group to register.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the tool group.
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
    Chunk:
      type: object
      properties:
@ -9764,41 +9393,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
          description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -998,39 +998,6 @@ paths:
      description: List models using the OpenAI API.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Model.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Model'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Register model.
-      description: >-
-        Register model.
-
-        Register a model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterModelRequest'
-        required: true
-      deprecated: false
  /v1/models/{model_id}:
    get:
      responses:
@ -1065,36 +1032,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Models
-      summary: Unregister model.
-      description: >-
-        Unregister model.
-
-        Unregister a model.
-      parameters:
-        - name: model_id
-          in: path
-          description: >-
-            The identifier of the model to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/moderations:
    post:
      responses:
@ -1725,32 +1662,6 @@ paths:
      description: List all scoring functions.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Register a scoring function.
-      description: Register a scoring function.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
-        required: true
-      deprecated: false
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -1782,33 +1693,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      summary: Unregister a scoring function.
-      description: Unregister a scoring function.
-      parameters:
-        - name: scoring_fn_id
-          in: path
-          description: >-
-            The ID of the scoring function to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/scoring/score:
    post:
      responses:
@ -1897,36 +1781,6 @@ paths:
      description: List all shields.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: A Shield.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Shield'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Register a shield.
-      description: Register a shield.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterShieldRequest'
-        required: true
-      deprecated: false
  /v1/shields/{identifier}:
    get:
      responses:
@ -1958,33 +1812,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Shields
-      summary: Unregister a shield.
-      description: Unregister a shield.
-      parameters:
-        - name: identifier
-          in: path
-          description: >-
-            The identifier of the shield to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -2080,32 +1907,6 @@ paths:
      description: List tool groups with optional provider.
      parameters: []
      deprecated: false
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Register a tool group.
-      description: Register a tool group.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterToolGroupRequest'
-        required: true
-      deprecated: false
  /v1/toolgroups/{toolgroup_id}:
    get:
      responses:
@ -2137,32 +1938,6 @@ paths:
          schema:
            type: string
      deprecated: false
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolGroups
-      summary: Unregister a tool group.
-      description: Unregister a tool group.
-      parameters:
-        - name: toolgroup_id
-          in: path
-          description: The ID of the tool group to unregister.
-          required: true
-          schema:
-            type: string
-      deprecated: false
  /v1/tools:
    get:
      responses:
@ -2916,11 +2691,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -3171,7 +2946,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterDatasetRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1beta/datasets/{dataset_id}:
    get:
      responses:
@ -3228,7 +3003,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks:
    get:
      responses:
@ -3279,7 +3054,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterBenchmarkRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}:
    get:
      responses:
@ -3336,7 +3111,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -6280,46 +6055,6 @@ components:
      required:
        - data
      title: OpenAIListModelsResponse
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
    Model:
      type: object
      properties:
@ -6377,6 +6112,15 @@ components:
      title: Model
      description: >-
        A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
    RunModerationRequest:
      type: object
      properties:
@ -9130,61 +8874,6 @@ components:
      required:
        - data
      title: ListScoringFunctionsResponse
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
    ScoreRequest:
      type: object
      properties:
@ -9360,35 +9049,6 @@ components:
      required:
        - data
      title: ListShieldsResponse
-    RegisterShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
    InvokeToolRequest:
      type: object
      properties:
@ -9649,37 +9309,6 @@ components:
      title: ListToolGroupsResponse
      description: >-
        Response containing a list of tool groups.
-    RegisterToolGroupRequest:
-      type: object
-      properties:
-        toolgroup_id:
-          type: string
-          description: The ID of the tool group to register.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the tool group.
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
    Chunk:
      type: object
      properties:
@ -10480,41 +10109,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
          description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
@ -10831,68 +10454,6 @@ components:
        - data
      title: ListDatasetsResponse
      description: Response from listing datasets.
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
    Benchmark:
      type: object
      properties:
@ -10960,47 +10521,6 @@ components:
      required:
        - data
      title: ListBenchmarksResponse
-    RegisterBenchmarkRequest:
-      type: object
-      properties:
-        benchmark_id:
-          type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
    BenchmarkConfig:
      type: object
      properties:
@ -11862,6 +11382,109 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
  responses:
    BadRequest400:
      description: The request was invalid or malformed
--- a/src/llama_stack/apis/benchmarks/benchmarks.py
+++ b/src/llama_stack/apis/benchmarks/benchmarks.py
@ -74,7 +74,7 @@ class Benchmarks(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
    async def register_benchmark(
        self,
        benchmark_id: str,
@ -95,7 +95,7 @@ class Benchmarks(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
    async def unregister_benchmark(self, benchmark_id: str) -> None:
        """Unregister a benchmark.

--- a/src/llama_stack/apis/datasets/datasets.py
+++ b/src/llama_stack/apis/datasets/datasets.py
@ -146,7 +146,7 @@ class ListDatasetsResponse(BaseModel):


 class Datasets(Protocol):
-    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
    async def register_dataset(
        self,
        purpose: DatasetPurpose,
@ -235,7 +235,7 @@ class Datasets(Protocol):
        """
        ...

-    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
    async def unregister_dataset(
        self,
        dataset_id: str,
--- a/src/llama_stack/apis/models/models.py
+++ b/src/llama_stack/apis/models/models.py
@ -136,7 +136,7 @@ class Models(Protocol):
        """
        ...

-    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_model(
        self,
        model_id: str,
@ -158,7 +158,7 @@ class Models(Protocol):
        """
        ...

-    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    async def unregister_model(
        self,
        model_id: str,
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama_stack/apis/scoring_functions/scoring_functions.py
@ -178,7 +178,7 @@ class ScoringFunctions(Protocol):
        """
        ...

-    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_scoring_function(
        self,
        scoring_fn_id: str,
@ -199,7 +199,9 @@ class ScoringFunctions(Protocol):
        """
        ...

-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(
+        route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
+    )
    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
        """Unregister a scoring function.

--- a/src/llama_stack/apis/shields/shields.py
+++ b/src/llama_stack/apis/shields/shields.py
@ -67,7 +67,7 @@ class Shields(Protocol):
        """
        ...

-    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_shield(
        self,
        shield_id: str,
@ -85,7 +85,7 @@ class Shields(Protocol):
        """
        ...

-    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    async def unregister_shield(self, identifier: str) -> None:
        """Unregister a shield.

--- a/src/llama_stack/apis/tools/tools.py
+++ b/src/llama_stack/apis/tools/tools.py
@ -109,7 +109,7 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable
@telemetry_traceable
 class ToolGroups(Protocol):
-    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_tool_group(
        self,
        toolgroup_id: str,
@ -167,7 +167,7 @@ class ToolGroups(Protocol):
        """
        ...

-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    async def unregister_toolgroup(
        self,
        toolgroup_id: str,
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):


@json_schema_type
-class VectorStoreFileContentsResponse(BaseModel):
-    """Response from retrieving the contents of a vector store file.
+class VectorStoreFileContentResponse(BaseModel):
+    """Represents the parsed content of a vector store file.

-    :param file_id: Unique identifier for the file
-    :param filename: Name of the file
-    :param attributes: Key-value attributes associated with the file
-    :param content: List of content items from the file
+    :param object: The object type, which is always `vector_store.file_content.page`
+    :param data: Parsed content of the file
+    :param has_more: Indicates if there are more content pages to fetch
+    :param next_page: The token for the next page, if any
    """

-    file_id: str
-    filename: str
-    attributes: dict[str, Any]
-    content: list[VectorStoreContent]
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
+    data: list[VectorStoreContent]
+    has_more: bool
+    next_page: str | None = None


@json_schema_type
@ -732,12 +732,12 @@ class VectorIO(Protocol):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file.

        :param vector_store_id: The ID of the vector store containing the file to retrieve.
        :param file_id: The ID of the file to retrieve.
-        :returns: A list of InterleavedContent representing the file contents.
+        :returns: A VectorStoreFileContentResponse representing the file contents.
        """
        ...

--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreChunkingStrategyStaticConfig,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFilesListInBatchResponse,
@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
        provider = await self.routing_table.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFileStatus,
@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -223,7 +223,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")

    async def register_vector_store(self, vector_store: VectorStore) -> None:
-        assert self.kvstore is not None
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")

        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
@ -239,7 +240,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
        return [i.vector_store for i in self.cache.values()]

    async def unregister_vector_store(self, vector_store_id: str) -> None:
-        assert self.kvstore is not None
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")

        if vector_store_id not in self.cache:
            return
@ -248,6 +250,27 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
        del self.cache[vector_store_id]
        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")

+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
+
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
+
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
+            raise VectorStoreNotFoundError(vector_store_id)
+
+        vector_store = VectorStore.model_validate_json(vector_store_data)
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
+            inference_api=self.inference_api,
+        )
+        self.cache[vector_store_id] = index
+        return index
+
    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
        index = self.cache.get(vector_store_id)
        if index is None:
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -412,6 +412,14 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
        return [v.vector_store for v in self.cache.values()]

    async def register_vector_store(self, vector_store: VectorStore) -> None:
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
+
+        # Save to kvstore for persistence
+        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
+
+        # Create and cache the index
        index = await SQLiteVecIndex.create(
            vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
        )
@ -421,13 +429,16 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]

-        if self.vector_store_table is None:
-            raise VectorStoreNotFoundError(vector_store_id)
-
-        vector_store = self.vector_store_table.get_vector_store(vector_store_id)
-        if not vector_store:
+        # Try to load from kvstore
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
+
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)

+        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=SQLiteVecIndex(
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -131,7 +131,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.persistence)
-        self.vector_store_table = self.kvstore

        if isinstance(self.config, RemoteChromaVectorIOConfig):
            log.info(f"Connecting to Chroma server at: {self.config.url}")
@ -190,9 +189,16 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]

-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
-        if not vector_store:
+        # Try to load from kvstore
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
+
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
            raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
+
+        vector_store = VectorStore.model_validate_json(vector_store_data)
        collection = await maybe_await(self.client.get_collection(vector_store_id))
        if not collection:
            raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -328,13 +328,16 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]

-        if self.vector_store_table is None:
-            raise VectorStoreNotFoundError(vector_store_id)
-
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
-        if not vector_store:
+        # Try to load from kvstore
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
+
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)

+        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -368,6 +368,22 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
            log.exception("Could not connect to PGVector database server")
            raise RuntimeError("Could not connect to PGVector database server") from e

+        # Load existing vector stores from KV store into cache
+        start_key = VECTOR_DBS_PREFIX
+        end_key = f"{VECTOR_DBS_PREFIX}\xff"
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            pgvector_index = PGVectorIndex(
+                vector_store=vector_store,
+                dimension=vector_store.embedding_dimension,
+                conn=self.conn,
+                kvstore=self.kvstore,
+            )
+            await pgvector_index.initialize()
+            index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
+            self.cache[vector_store.identifier] = index
+
    async def shutdown(self) -> None:
        if self.conn is not None:
            self.conn.close()
@ -377,7 +393,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt

    async def register_vector_store(self, vector_store: VectorStore) -> None:
        # Persist vector DB metadata in the KV store
-        assert self.kvstore is not None
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
+
+        # Save to kvstore for persistence
+        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
+
        # Upsert model metadata in Postgres
        upsert_models(self.conn, [(vector_store.identifier, vector_store)])

@ -396,7 +418,8 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
            del self.cache[vector_store_id]

        # Delete vector DB metadata from KV store
-        assert self.kvstore is not None
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
        await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")

    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
@ -413,13 +436,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]

-        if self.vector_store_table is None:
-            raise VectorStoreNotFoundError(vector_store_id)
-
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
-        if not vector_store:
+        # Try to load from kvstore
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
+
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)

+        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
        await index.initialize()
        self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -183,7 +183,8 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        await super().shutdown()

    async def register_vector_store(self, vector_store: VectorStore) -> None:
-        assert self.kvstore is not None
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
        await self.kvstore.set(key=key, value=vector_store.model_dump_json())

@ -200,20 +201,24 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
            await self.cache[vector_store_id].index.delete()
            del self.cache[vector_store_id]

-        assert self.kvstore is not None
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")

    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]

-        if self.vector_store_table is None:
-            raise ValueError(f"Vector DB not found {vector_store_id}")
+        # Try to load from kvstore
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")

-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
-        if not vector_store:
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)

+        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@ -346,13 +346,16 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]

-        if self.vector_store_table is None:
-            raise VectorStoreNotFoundError(vector_store_id)
-
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
-        if not vector_store:
+        # Try to load from kvstore
+        if self.kvstore is None:
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
+
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
+        vector_store_data = await self.kvstore.get(key)
+        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)

+        vector_store = VectorStore.model_validate_json(vector_store_data)
        client = self._get_client()
        sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
        if not client.collections.exists(sanitized_collection_name):
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreContent,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileCounts,
    VectorStoreFileDeleteResponse,
    VectorStoreFileLastError,
@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file."""
        if vector_store_id not in self.openai_vector_stores:
            raise VectorStoreNotFoundError(vector_store_id)

-        file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
        dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
        chunks = [Chunk.model_validate(c) for c in dict_chunks]
        content = []
        for chunk in chunks:
            content.extend(self._chunk_to_vector_store_content(chunk))
-        return VectorStoreFileContentsResponse(
-            file_id=file_id,
-            filename=file_info.get("filename", ""),
-            attributes=file_info.get("attributes", {}),
-            content=content,
+        return VectorStoreFileContentResponse(
+            object="vector_store.file_content.page",
+            data=content,
+            has_more=False,
+            next_page=None,
        )

    async def openai_update_vector_store_file(
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
    )

    assert file_contents is not None
-    assert len(file_contents.content) == 1
-    content = file_contents.content[0]
+    assert file_contents.object == "vector_store.file_content.page"
+    assert len(file_contents.data) == 1
+    content = file_contents.data[0]

    # llama-stack-client returns a model, openai-python is a badboy and returns a dict
    if not isinstance(content, dict):
        content = content.model_dump()
    assert content["type"] == "text"
    assert content["text"] == test_content.decode("utf-8")
-    assert file_contents.filename == file_name
-    assert file_contents.attributes == attributes
+    assert file_contents.has_more is False


@vector_provider_wrapper
@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        )

        assert file_contents is not None
-        assert file_contents.filename == file_data[i][0]
-        assert len(file_contents.content) > 0
+        assert file_contents.object == "vector_store.file_content.page"
+        assert len(file_contents.data) > 0

        # Verify the content matches what we uploaded
        content_text = (
-            file_contents.content[0].text
-            if hasattr(file_contents.content[0], "text")
-            else file_contents.content[0]["text"]
+            file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
        )
        assert file_data[i][1].decode("utf-8") in content_text

--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -92,6 +92,99 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
    await vector_io_adapter.shutdown()


+async def test_vector_store_lazy_loading_from_kvstore(vector_io_adapter):
+    """
+    Test that vector stores can be lazy-loaded from KV store when not in cache.
+
+    Verifies that clearing the cache doesn't break vector store access - they
+    can be loaded on-demand from persistent storage.
+    """
+    await vector_io_adapter.initialize()
+
+    vector_store_id = f"lazy_load_test_{np.random.randint(1e6)}"
+    vector_store = VectorStore(
+        identifier=vector_store_id,
+        provider_id="test_provider",
+        embedding_model="test_model",
+        embedding_dimension=128,
+    )
+    await vector_io_adapter.register_vector_store(vector_store)
+    assert vector_store_id in vector_io_adapter.cache
+
+    vector_io_adapter.cache.clear()
+    assert vector_store_id not in vector_io_adapter.cache
+
+    loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
+    assert loaded_index is not None
+    assert loaded_index.vector_store.identifier == vector_store_id
+    assert vector_store_id in vector_io_adapter.cache
+
+    cached_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
+    assert cached_index is loaded_index
+
+    await vector_io_adapter.shutdown()
+
+
+async def test_vector_store_preloading_on_initialization(vector_io_adapter):
+    """
+    Test that vector stores are preloaded from KV store during initialization.
+
+    Verifies that after restart, all vector stores are automatically loaded into
+    cache and immediately accessible without requiring lazy loading.
+    """
+    await vector_io_adapter.initialize()
+
+    vector_store_ids = [f"preload_test_{i}_{np.random.randint(1e6)}" for i in range(3)]
+    for vs_id in vector_store_ids:
+        vector_store = VectorStore(
+            identifier=vs_id,
+            provider_id="test_provider",
+            embedding_model="test_model",
+            embedding_dimension=128,
+        )
+        await vector_io_adapter.register_vector_store(vector_store)
+
+    for vs_id in vector_store_ids:
+        assert vs_id in vector_io_adapter.cache
+
+    await vector_io_adapter.shutdown()
+    await vector_io_adapter.initialize()
+
+    for vs_id in vector_store_ids:
+        assert vs_id in vector_io_adapter.cache
+
+    for vs_id in vector_store_ids:
+        loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vs_id)
+        assert loaded_index is not None
+        assert loaded_index.vector_store.identifier == vs_id
+
+    await vector_io_adapter.shutdown()
+
+
+async def test_kvstore_none_raises_runtime_error(vector_io_adapter):
+    """
+    Test that accessing vector stores with uninitialized kvstore raises RuntimeError.
+
+    Verifies proper RuntimeError is raised instead of assertions when kvstore is None.
+    """
+    await vector_io_adapter.initialize()
+
+    vector_store_id = f"kvstore_none_test_{np.random.randint(1e6)}"
+    vector_store = VectorStore(
+        identifier=vector_store_id,
+        provider_id="test_provider",
+        embedding_model="test_model",
+        embedding_dimension=128,
+    )
+    await vector_io_adapter.register_vector_store(vector_store)
+
+    vector_io_adapter.cache.clear()
+    vector_io_adapter.kvstore = None
+
+    with pytest.raises(RuntimeError, match="KVStore not initialized"):
+        await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
+
+
 async def test_register_and_unregister_vector_store(vector_io_adapter):
    unique_id = f"foo_db_{np.random.randint(1e6)}"
    dummy = VectorStore(