Merge branch 'main' into add-mcp-authentication-param

2025-12-03 09:53:45 +00:00 · 2025-11-10 13:19:12 -08:00 · 2025-11-10 13:19:12 -08:00 · 114ab693a5
commit 114ab693a5
parent 6716e128be 209a78b618
40 changed files with 2827 additions and 1700 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -998,39 +998,6 @@ paths:
      description: List models using the OpenAI API.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: A Model.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
      summary: Register model.
      description: >-
        Register model.
        Register a model.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterModelRequest'
        required: true
      deprecated: false
  /v1/models/{model_id}:
    get:
      responses:
@ -1065,36 +1032,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
      summary: Unregister model.
      description: >-
        Unregister model.
        Unregister a model.
      parameters:
        - name: model_id
          in: path
          description: >-
            The identifier of the model to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/moderations:
    post:
      responses:
@ -1725,32 +1662,6 @@ paths:
      description: List all scoring functions.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ScoringFunctions
      summary: Register a scoring function.
      description: Register a scoring function.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
        required: true
      deprecated: false
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -1782,33 +1693,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ScoringFunctions
      summary: Unregister a scoring function.
      description: Unregister a scoring function.
      parameters:
        - name: scoring_fn_id
          in: path
          description: >-
            The ID of the scoring function to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/scoring/score:
    post:
      responses:
@ -1897,36 +1781,6 @@ paths:
      description: List all shields.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: A Shield.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Shield'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Shields
      summary: Register a shield.
      description: Register a shield.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterShieldRequest'
        required: true
      deprecated: false
  /v1/shields/{identifier}:
    get:
      responses:
@ -1958,33 +1812,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Shields
      summary: Unregister a shield.
      description: Unregister a shield.
      parameters:
        - name: identifier
          in: path
          description: >-
            The identifier of the shield to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -2080,32 +1907,6 @@ paths:
      description: List tool groups with optional provider.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ToolGroups
      summary: Register a tool group.
      description: Register a tool group.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterToolGroupRequest'
        required: true
      deprecated: false
  /v1/toolgroups/{toolgroup_id}:
    get:
      responses:
@ -2137,32 +1938,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ToolGroups
      summary: Unregister a tool group.
      description: Unregister a tool group.
      parameters:
        - name: toolgroup_id
          in: path
          description: The ID of the tool group to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/tools:
    get:
      responses:
@ -2916,11 +2691,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -3171,7 +2946,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterDatasetRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1beta/datasets/{dataset_id}:
    get:
      responses:
@ -3228,7 +3003,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks:
    get:
      responses:
@ -3279,7 +3054,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterBenchmarkRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}:
    get:
      responses:
@ -3336,7 +3111,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -6280,46 +6055,6 @@ components:
      required:
        - data
      title: OpenAIListModelsResponse
    ModelType:
      type: string
      enum:
        - llm
        - embedding
        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
    RegisterModelRequest:
      type: object
      properties:
        model_id:
          type: string
          description: The identifier of the model to register.
        provider_model_id:
          type: string
          description: >-
            The identifier of the model in the provider.
        provider_id:
          type: string
          description: The identifier of the provider.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: Any additional metadata for this model.
        model_type:
          $ref: '#/components/schemas/ModelType'
          description: The type of model to register.
      additionalProperties: false
      required:
        - model_id
      title: RegisterModelRequest
    Model:
      type: object
      properties:
@ -6377,6 +6112,15 @@ components:
      title: Model
      description: >-
        A model resource representing an AI model registered in Llama Stack.
    ModelType:
      type: string
      enum:
        - llm
        - embedding
        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
    RunModerationRequest:
      type: object
      properties:
@ -9119,61 +8863,6 @@ components:
      required:
        - data
      title: ListScoringFunctionsResponse
    ParamType:
      oneOf:
        - $ref: '#/components/schemas/StringType'
        - $ref: '#/components/schemas/NumberType'
        - $ref: '#/components/schemas/BooleanType'
        - $ref: '#/components/schemas/ArrayType'
        - $ref: '#/components/schemas/ObjectType'
        - $ref: '#/components/schemas/JsonType'
        - $ref: '#/components/schemas/UnionType'
        - $ref: '#/components/schemas/ChatCompletionInputType'
        - $ref: '#/components/schemas/CompletionInputType'
      discriminator:
        propertyName: type
        mapping:
          string: '#/components/schemas/StringType'
          number: '#/components/schemas/NumberType'
          boolean: '#/components/schemas/BooleanType'
          array: '#/components/schemas/ArrayType'
          object: '#/components/schemas/ObjectType'
          json: '#/components/schemas/JsonType'
          union: '#/components/schemas/UnionType'
          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
          completion_input: '#/components/schemas/CompletionInputType'
    RegisterScoringFunctionRequest:
      type: object
      properties:
        scoring_fn_id:
          type: string
          description: >-
            The ID of the scoring function to register.
        description:
          type: string
          description: The description of the scoring function.
        return_type:
          $ref: '#/components/schemas/ParamType'
          description: The return type of the scoring function.
        provider_scoring_fn_id:
          type: string
          description: >-
            The ID of the provider scoring function to use for the scoring function.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the scoring function.
        params:
          $ref: '#/components/schemas/ScoringFnParams'
          description: >-
            The parameters for the scoring function for benchmark eval, these can
            be overridden for app eval.
      additionalProperties: false
      required:
        - scoring_fn_id
        - description
        - return_type
      title: RegisterScoringFunctionRequest
    ScoreRequest:
      type: object
      properties:
@ -9349,35 +9038,6 @@ components:
      required:
        - data
      title: ListShieldsResponse
    RegisterShieldRequest:
      type: object
      properties:
        shield_id:
          type: string
          description: >-
            The identifier of the shield to register.
        provider_shield_id:
          type: string
          description: >-
            The identifier of the shield in the provider.
        provider_id:
          type: string
          description: The identifier of the provider.
        params:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The parameters of the shield.
      additionalProperties: false
      required:
        - shield_id
      title: RegisterShieldRequest
    InvokeToolRequest:
      type: object
      properties:
@ -9638,37 +9298,6 @@ components:
      title: ListToolGroupsResponse
      description: >-
        Response containing a list of tool groups.
    RegisterToolGroupRequest:
      type: object
      properties:
        toolgroup_id:
          type: string
          description: The ID of the tool group to register.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the tool group.
        mcp_endpoint:
          $ref: '#/components/schemas/URL'
          description: >-
            The MCP endpoint to use for the tool group.
        args:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            A dictionary of arguments to pass to the tool group.
      additionalProperties: false
      required:
        - toolgroup_id
        - provider_id
      title: RegisterToolGroupRequest
    Chunk:
      type: object
      properties:
@ -10469,41 +10098,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
+          const: vector_store.file_content.page
-        filename:
+          default: vector_store.file_content.page
          type: string
          description: Name of the file
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
-            Key-value attributes associated with the file
+            The object type, which is always `vector_store.file_content.page`
-        content:
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
        has_more:
          type: boolean
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
          type: string
          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
+        - object
-        - filename
+        - data
-        - attributes
+        - has_more
-        - content
+      title: VectorStoreFileContentResponse
      title: VectorStoreFileContentsResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
@ -10820,68 +10443,6 @@ components:
        - data
      title: ListDatasetsResponse
      description: Response from listing datasets.
    DataSource:
      oneOf:
        - $ref: '#/components/schemas/URIDataSource'
        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
          uri: '#/components/schemas/URIDataSource'
          rows: '#/components/schemas/RowsDataSource'
    RegisterDatasetRequest:
      type: object
      properties:
        purpose:
          type: string
          enum:
            - post-training/messages
            - eval/question-answer
            - eval/messages-answer
          description: >-
            The purpose of the dataset. One of: - "post-training/messages": The dataset
            contains a messages column with list of messages for post-training. {
            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
            contains a question column and an answer column for evaluation. { "question":
            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
            The dataset contains a messages column with list of messages and an answer
            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
            Doe. How can I help you today?"}, {"role": "user", "content": "What's
            my name?"}, ], "answer": "John Doe" }
        source:
          $ref: '#/components/schemas/DataSource'
          description: >-
            The data source of the dataset. Ensure that the data source schema is
            compatible with the purpose of the dataset. Examples: - { "type": "uri",
            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
            } ] }
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            The metadata for the dataset. - E.g. {"description": "My dataset"}.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset. If not provided, an ID will be generated.
      additionalProperties: false
      required:
        - purpose
        - source
      title: RegisterDatasetRequest
    Benchmark:
      type: object
      properties:
@ -10949,47 +10510,6 @@ components:
      required:
        - data
      title: ListBenchmarksResponse
    RegisterBenchmarkRequest:
      type: object
      properties:
        benchmark_id:
          type: string
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to use for the benchmark.
        scoring_functions:
          type: array
          items:
            type: string
          description: >-
            The scoring functions to use for the benchmark.
        provider_benchmark_id:
          type: string
          description: >-
            The ID of the provider benchmark to use for the benchmark.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the benchmark.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The metadata to use for the benchmark.
      additionalProperties: false
      required:
        - benchmark_id
        - dataset_id
        - scoring_functions
      title: RegisterBenchmarkRequest
    BenchmarkConfig:
      type: object
      properties:
@ -11851,6 +11371,109 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
    DataSource:
      oneOf:
        - $ref: '#/components/schemas/URIDataSource'
        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
          uri: '#/components/schemas/URIDataSource'
          rows: '#/components/schemas/RowsDataSource'
    RegisterDatasetRequest:
      type: object
      properties:
        purpose:
          type: string
          enum:
            - post-training/messages
            - eval/question-answer
            - eval/messages-answer
          description: >-
            The purpose of the dataset. One of: - "post-training/messages": The dataset
            contains a messages column with list of messages for post-training. {
            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
            contains a question column and an answer column for evaluation. { "question":
            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
            The dataset contains a messages column with list of messages and an answer
            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
            Doe. How can I help you today?"}, {"role": "user", "content": "What's
            my name?"}, ], "answer": "John Doe" }
        source:
          $ref: '#/components/schemas/DataSource'
          description: >-
            The data source of the dataset. Ensure that the data source schema is
            compatible with the purpose of the dataset. Examples: - { "type": "uri",
            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
            } ] }
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            The metadata for the dataset. - E.g. {"description": "My dataset"}.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset. If not provided, an ID will be generated.
      additionalProperties: false
      required:
        - purpose
        - source
      title: RegisterDatasetRequest
    RegisterBenchmarkRequest:
      type: object
      properties:
        benchmark_id:
          type: string
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to use for the benchmark.
        scoring_functions:
          type: array
          items:
            type: string
          description: >-
            The scoring functions to use for the benchmark.
        provider_benchmark_id:
          type: string
          description: >-
            The ID of the provider benchmark to use for the benchmark.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the benchmark.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The metadata to use for the benchmark.
      additionalProperties: false
      required:
        - benchmark_id
        - dataset_id
        - scoring_functions
      title: RegisterBenchmarkRequest
  responses:
    BadRequest400:
      description: The request was invalid or malformed
--- a/docs/docs/deploying/kubernetes_deployment.mdx
+++ b/docs/docs/deploying/kubernetes_deployment.mdx
@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
 # Kubernetes Deployment Guide
-Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers both local development with Kind and production deployment on AWS EKS.
+Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers deployment using the Kubernetes operator to manage the Llama Stack server with Kind. The vLLM inference server is deployed manually.
 ## Prerequisites
@ -110,115 +110,176 @@ spec:
 EOF
 ```
-### Step 3: Configure Llama Stack
+### Step 3: Install Kubernetes Operator
-Update your run configuration:
+Install the Llama Stack Kubernetes operator to manage Llama Stack deployments:
 ```yaml
 providers:
  inference:
  - provider_id: vllm
    provider_type: remote::vllm
    config:
      url: http://vllm-server.default.svc.cluster.local:8000/v1
      max_tokens: 4096
      api_token: fake
 ```
 Build container image:
 ```bash
-tmp_dir=$(mktemp -d) && cat >$tmp_dir/Containerfile.llama-stack-run-k8s <<EOF
+# Install from the latest main branch
-FROM distribution-myenv:dev
+kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/release/operator.yaml
-RUN apt-get update && apt-get install -y git
+
-RUN git clone https://github.com/meta-llama/llama-stack.git /app/llama-stack-source
+# Or install a specific version (e.g., v0.4.0)
-ADD ./vllm-llama-stack-run-k8s.yaml /app/config.yaml
+# kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/v0.4.0/release/operator.yaml
 EOF
 podman build -f $tmp_dir/Containerfile.llama-stack-run-k8s -t llama-stack-run-k8s $tmp_dir
 ```
-### Step 4: Deploy Llama Stack Server
+Verify the operator is running:
 ```bash
 kubectl get pods -n llama-stack-operator-system
 ```
 For more information about the operator, see the [llama-stack-k8s-operator repository](https://github.com/llamastack/llama-stack-k8s-operator).
 ### Step 4: Deploy Llama Stack Server using Operator
 Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
 You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
 ```yaml
 cat <<EOF | kubectl apply -f -
-apiVersion: v1
+apiVersion: llamastack.io/v1alpha1
-kind: PersistentVolumeClaim
+kind: LlamaStackDistribution
 metadata:
-  name: llama-pvc
+  name: llamastack-vllm
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llama-stack-server
 spec:
  replicas: 1
-  selector:
+  server:
-    matchLabels:
+    distribution:
-      app.kubernetes.io/name: llama-stack
+      name: starter
-  template:
+    containerSpec:
-    metadata:
+      port: 8321
-      labels:
+      env:
-        app.kubernetes.io/name: llama-stack
+      - name: VLLM_URL
-    spec:
+        value: "http://vllm-server.default.svc.cluster.local:8000/v1"
-      containers:
+      - name: VLLM_MAX_TOKENS
-      - name: llama-stack
+        value: "4096"
-        image: localhost/llama-stack-run-k8s:latest
+      - name: VLLM_API_TOKEN
-        imagePullPolicy: IfNotPresent
+        value: "fake"
-        command: ["llama", "stack", "run", "/app/config.yaml"]
+    # Optional: override run.yaml from a ConfigMap using userConfig
-        ports:
+    userConfig:
-          - containerPort: 5000
+      configMap:
-        volumeMounts:
+        name: llama-stack-config
-          - name: llama-storage
+    storage:
-            mountPath: /root/.llama
+      size: "20Gi"
-      volumes:
+      mountPath: "/home/lls/.lls"
      - name: llama-storage
        persistentVolumeClaim:
          claimName: llama-pvc
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llama-stack-service
 spec:
  selector:
    app.kubernetes.io/name: llama-stack
  ports:
  - protocol: TCP
    port: 5000
    targetPort: 5000
  type: ClusterIP
 EOF
 ```
 **Configuration Options:**
 - `replicas`: Number of Llama Stack server instances to run
 - `server.distribution.name`: The distribution to use (e.g., `starter` for the starter distribution). See the [list of supported distributions](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository.
 - `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
 - `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
 - `server.containerSpec.env`: Environment variables to configure providers:
 - `server.userConfig`: (Optional) Override the default `run.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
 - `server.storage.size`: Size of the persistent volume for model and data storage
 - `server.storage.mountPath`: Where to mount the storage in the container
 **Note:** For a complete list of supported distributions, see [distributions.json](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/distributions.json) in the operator repository. To use a custom or non-supported distribution, set the `server.distribution.image` field with your container image instead of  `server.distribution.name`.
 The operator automatically creates:
 - A Deployment for the Llama Stack server
 - A Service to access the server
 - A PersistentVolumeClaim for storage
 - All necessary RBAC resources
 Check the status of your deployment:
 ```bash
 kubectl get llamastackdistribution
 kubectl describe llamastackdistribution llamastack-vllm
 ```
 ### Step 5: Test Deployment
 Wait for the Llama Stack server pod to be ready:
 ```bash
-# Port forward and test
+# Check the status of the LlamaStackDistribution
-kubectl port-forward service/llama-stack-service 5000:5000
+kubectl get llamastackdistribution llamastack-vllm
-llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
+
 # Check the pods created by the operator
 kubectl get pods -l app.kubernetes.io/name=llama-stack
 # Wait for the pod to be ready
 kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=llama-stack --timeout=300s
 ```
 Get the service name created by the operator (it typically follows the pattern `<llamastackdistribution-name>-service`):
 ```bash
 # List services to find the service name
 kubectl get services | grep llamastack
 # Port forward and test (replace SERVICE_NAME with the actual service name)
 kubectl port-forward service/llamastack-vllm-service 8321:8321
 ```
 In another terminal, test the deployment:
 ```bash
 llama-stack-client --endpoint http://localhost:8321 inference chat-completion --message "hello, what model are you?"
 ```
 ## Troubleshooting
-**Check pod status:**
+### vLLM Server Issues
 **Check vLLM pod status:**
 ```bash
 kubectl get pods -l app.kubernetes.io/name=vllm
 kubectl logs -l app.kubernetes.io/name=vllm
 ```
-**Test service connectivity:**
+**Test vLLM service connectivity:**
 ```bash
 kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models
 ```
 ### Llama Stack Server Issues
 **Check LlamaStackDistribution status:**
 ```bash
 # Get detailed status
 kubectl describe llamastackdistribution llamastack-vllm
 # Check for events
 kubectl get events --sort-by='.lastTimestamp' | grep llamastack-vllm
 ```
 **Check operator-managed pods:**
 ```bash
 # List all pods managed by the operator
 kubectl get pods -l app.kubernetes.io/name=llama-stack
 # Check pod logs (replace POD_NAME with actual pod name)
 kubectl logs -l app.kubernetes.io/name=llama-stack
 ```
 **Check operator status:**
 ```bash
 # Verify the operator is running
 kubectl get pods -n llama-stack-operator-system
 # Check operator logs if issues persist
 kubectl logs -n llama-stack-operator-system -l control-plane=controller-manager
 ```
 **Verify service connectivity:**
 ```bash
 # Get the service endpoint
 kubectl get svc llamastack-vllm-service
 # Test connectivity from within the cluster
 kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://llamastack-vllm-service:8321/health
 ```
 ## Related Resources
 - **[Deployment Overview](/docs/deploying/)** - Overview of deployment options
 - **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions
 - **[Configuration](/docs/distributions/configuration)** - Detailed configuration options
 - **[LlamaStack Operator](https://github.com/llamastack/llama-stack-k8s-operator)** - Overview of llama-stack kubernetes operator
 - **[LlamaStackDistribution](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md)** - API Spec of the llama-stack operator Custom Resource.
--- a/docs/docs/distributions/remote_hosted_distro/oci.md
+++ b/docs/docs/distributions/remote_hosted_distro/oci.md
@ -0,0 +1,143 @@
 ---
 orphan: true
 ---
 <!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # OCI Distribution
 The `llamastack/distribution-oci` distribution consists of the following provider configurations.
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | datasetio | `remote::huggingface`, `inline::localfs` |
 | eval | `inline::meta-reference` |
 | files | `inline::localfs` |
 | inference | `remote::oci` |
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 ### Environment Variables
 The following environment variables can be configured:
 - `OCI_AUTH_TYPE`: OCI authentication type (instance_principal or config_file) (default: `instance_principal`)
 - `OCI_REGION`: OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1) (default: ``)
 - `OCI_COMPARTMENT_OCID`: OCI compartment ID for the Generative AI service (default: ``)
 - `OCI_CONFIG_FILE_PATH`: OCI config file path (required if OCI_AUTH_TYPE is config_file) (default: `~/.oci/config`)
 - `OCI_CLI_PROFILE`: OCI CLI profile name to use from config file (default: `DEFAULT`)
 ## Prerequisites
 ### Oracle Cloud Infrastructure Setup
 Before using the OCI Generative AI distribution, ensure you have:
 1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/)
 2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy
 3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models
 4. **Authentication**: Configure authentication using either:
   - **Instance Principal** (recommended for cloud-hosted deployments)
   - **API Key** (for on-premises or development environments)
 ### Authentication Methods
 #### Instance Principal Authentication (Recommended)
 Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments.
 Requirements:
 - Instance must be running in an Oracle Cloud Infrastructure compartment
 - Instance must have appropriate IAM policies to access Generative AI services
 #### API Key Authentication
 For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file.
 ### Required IAM Policies
 Ensure your OCI user or instance has the following policy statements:
 ```
 Allow group <group_name> to use generative-ai-inference-endpoints in compartment <compartment_name>
 Allow group <group_name> to manage generative-ai-inference-endpoints in compartment <compartment_name>
 ```
 ## Supported Services
 ### Inference: OCI Generative AI
 Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports:
 - **Chat Completions**: Conversational AI with context awareness
 - **Text Generation**: Complete prompts and generate text content
 #### Available Models
 Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models.
 ### Safety: Llama Guard
 For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide:
 - Content filtering and moderation
 - Policy compliance checking
 - Harmful content detection
 ### Vector Storage: Multiple Options
 The distribution supports several vector storage providers:
 - **FAISS**: Local in-memory vector search
 - **ChromaDB**: Distributed vector database
 - **PGVector**: PostgreSQL with vector extensions
 ### Additional Services
 - **Dataset I/O**: Local filesystem and Hugging Face integration
 - **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities
 - **Evaluation**: Meta reference evaluation framework
 ## Running Llama Stack with OCI
 You can run the OCI distribution via Docker or local virtual environment.
 ### Via venv
 If you've set up your local development environment, you can also build the image using your local virtual environment.
 ```bash
 OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci
 ```
 ### Configuration Examples
 #### Using Instance Principal (Recommended for Production)
 ```bash
 export OCI_AUTH_TYPE=instance_principal
 export OCI_REGION=us-chicago-1
 export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..<your-compartment-id>
 ```
 #### Using API Key Authentication (Development)
 ```bash
 export OCI_AUTH_TYPE=config_file
 export OCI_CONFIG_FILE_PATH=~/.oci/config
 export OCI_CLI_PROFILE=DEFAULT
 export OCI_REGION=us-chicago-1
 export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id
 ```
 ## Regional Endpoints
 OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit:
 https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions
 ## Troubleshooting
 ### Common Issues
 1. **Authentication Errors**: Verify your OCI credentials and IAM policies
 2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region
 3. **Permission Denied**: Check compartment permissions and Generative AI service access
 4. **Region Unavailable**: Verify the specified region supports Generative AI services
 ### Getting Help
 For additional support:
 - [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
 - [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues)
--- a/docs/docs/providers/inference/remote_oci.mdx
+++ b/docs/docs/providers/inference/remote_oci.mdx
@ -0,0 +1,41 @@
 ---
 description: |
  Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
  Provider documentation
  https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
 sidebar_label: Remote - Oci
 title: remote::oci
 ---
 # remote::oci
 ## Description
 Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
 Provider documentation
 https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
 ## Configuration
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
 | `oci_auth_type` | `<class 'str'>` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
 | `oci_region` | `<class 'str'>` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
 | `oci_compartment_id` | `<class 'str'>` | No |  | OCI compartment ID for the Generative AI service |
 | `oci_config_file_path` | `<class 'str'>` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
 | `oci_config_profile` | `<class 'str'>` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
 ## Sample Configuration
 ```yaml
 oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
 oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
 oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
 oci_region: ${env.OCI_REGION:=us-ashburn-1}
 oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
 ```
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -162,7 +162,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterDatasetRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1beta/datasets/{dataset_id}:
    get:
      responses:
@ -219,7 +219,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks:
    get:
      responses:
@ -270,7 +270,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterBenchmarkRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}:
    get:
      responses:
@ -327,7 +327,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -936,68 +936,6 @@ components:
        - data
      title: ListDatasetsResponse
      description: Response from listing datasets.
    DataSource:
      oneOf:
        - $ref: '#/components/schemas/URIDataSource'
        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
          uri: '#/components/schemas/URIDataSource'
          rows: '#/components/schemas/RowsDataSource'
    RegisterDatasetRequest:
      type: object
      properties:
        purpose:
          type: string
          enum:
            - post-training/messages
            - eval/question-answer
            - eval/messages-answer
          description: >-
            The purpose of the dataset. One of: - "post-training/messages": The dataset
            contains a messages column with list of messages for post-training. {
            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
            contains a question column and an answer column for evaluation. { "question":
            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
            The dataset contains a messages column with list of messages and an answer
            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
            Doe. How can I help you today?"}, {"role": "user", "content": "What's
            my name?"}, ], "answer": "John Doe" }
        source:
          $ref: '#/components/schemas/DataSource'
          description: >-
            The data source of the dataset. Ensure that the data source schema is
            compatible with the purpose of the dataset. Examples: - { "type": "uri",
            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
            } ] }
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            The metadata for the dataset. - E.g. {"description": "My dataset"}.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset. If not provided, an ID will be generated.
      additionalProperties: false
      required:
        - purpose
        - source
      title: RegisterDatasetRequest
    Benchmark:
      type: object
      properties:
@ -1065,47 +1003,6 @@ components:
      required:
        - data
      title: ListBenchmarksResponse
    RegisterBenchmarkRequest:
      type: object
      properties:
        benchmark_id:
          type: string
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to use for the benchmark.
        scoring_functions:
          type: array
          items:
            type: string
          description: >-
            The scoring functions to use for the benchmark.
        provider_benchmark_id:
          type: string
          description: >-
            The ID of the provider benchmark to use for the benchmark.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the benchmark.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The metadata to use for the benchmark.
      additionalProperties: false
      required:
        - benchmark_id
        - dataset_id
        - scoring_functions
      title: RegisterBenchmarkRequest
    AggregationFunctionType:
      type: string
      enum:
@ -2254,6 +2151,109 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
    DataSource:
      oneOf:
        - $ref: '#/components/schemas/URIDataSource'
        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
          uri: '#/components/schemas/URIDataSource'
          rows: '#/components/schemas/RowsDataSource'
    RegisterDatasetRequest:
      type: object
      properties:
        purpose:
          type: string
          enum:
            - post-training/messages
            - eval/question-answer
            - eval/messages-answer
          description: >-
            The purpose of the dataset. One of: - "post-training/messages": The dataset
            contains a messages column with list of messages for post-training. {
            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
            contains a question column and an answer column for evaluation. { "question":
            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
            The dataset contains a messages column with list of messages and an answer
            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
            Doe. How can I help you today?"}, {"role": "user", "content": "What's
            my name?"}, ], "answer": "John Doe" }
        source:
          $ref: '#/components/schemas/DataSource'
          description: >-
            The data source of the dataset. Ensure that the data source schema is
            compatible with the purpose of the dataset. Examples: - { "type": "uri",
            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
            } ] }
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            The metadata for the dataset. - E.g. {"description": "My dataset"}.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset. If not provided, an ID will be generated.
      additionalProperties: false
      required:
        - purpose
        - source
      title: RegisterDatasetRequest
    RegisterBenchmarkRequest:
      type: object
      properties:
        benchmark_id:
          type: string
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to use for the benchmark.
        scoring_functions:
          type: array
          items:
            type: string
          description: >-
            The scoring functions to use for the benchmark.
        provider_benchmark_id:
          type: string
          description: >-
            The ID of the provider benchmark to use for the benchmark.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the benchmark.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The metadata to use for the benchmark.
      additionalProperties: false
      required:
        - benchmark_id
        - dataset_id
        - scoring_functions
      title: RegisterBenchmarkRequest
  responses:
    BadRequest400:
      description: The request was invalid or malformed
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -995,39 +995,6 @@ paths:
      description: List models using the OpenAI API.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: A Model.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
      summary: Register model.
      description: >-
        Register model.
        Register a model.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterModelRequest'
        required: true
      deprecated: false
  /v1/models/{model_id}:
    get:
      responses:
@ -1062,36 +1029,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
      summary: Unregister model.
      description: >-
        Unregister model.
        Unregister a model.
      parameters:
        - name: model_id
          in: path
          description: >-
            The identifier of the model to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/moderations:
    post:
      responses:
@ -1722,32 +1659,6 @@ paths:
      description: List all scoring functions.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ScoringFunctions
      summary: Register a scoring function.
      description: Register a scoring function.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
        required: true
      deprecated: false
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -1779,33 +1690,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ScoringFunctions
      summary: Unregister a scoring function.
      description: Unregister a scoring function.
      parameters:
        - name: scoring_fn_id
          in: path
          description: >-
            The ID of the scoring function to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/scoring/score:
    post:
      responses:
@ -1894,36 +1778,6 @@ paths:
      description: List all shields.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: A Shield.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Shield'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Shields
      summary: Register a shield.
      description: Register a shield.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterShieldRequest'
        required: true
      deprecated: false
  /v1/shields/{identifier}:
    get:
      responses:
@ -1955,33 +1809,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Shields
      summary: Unregister a shield.
      description: Unregister a shield.
      parameters:
        - name: identifier
          in: path
          description: >-
            The identifier of the shield to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -2077,32 +1904,6 @@ paths:
      description: List tool groups with optional provider.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ToolGroups
      summary: Register a tool group.
      description: Register a tool group.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterToolGroupRequest'
        required: true
      deprecated: false
  /v1/toolgroups/{toolgroup_id}:
    get:
      responses:
@ -2134,32 +1935,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ToolGroups
      summary: Unregister a tool group.
      description: Unregister a tool group.
      parameters:
        - name: toolgroup_id
          in: path
          description: The ID of the tool group to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/tools:
    get:
      responses:
@ -2913,11 +2688,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -5564,46 +5339,6 @@ components:
      required:
        - data
      title: OpenAIListModelsResponse
    ModelType:
      type: string
      enum:
        - llm
        - embedding
        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
    RegisterModelRequest:
      type: object
      properties:
        model_id:
          type: string
          description: The identifier of the model to register.
        provider_model_id:
          type: string
          description: >-
            The identifier of the model in the provider.
        provider_id:
          type: string
          description: The identifier of the provider.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: Any additional metadata for this model.
        model_type:
          $ref: '#/components/schemas/ModelType'
          description: The type of model to register.
      additionalProperties: false
      required:
        - model_id
      title: RegisterModelRequest
    Model:
      type: object
      properties:
@ -5661,6 +5396,15 @@ components:
      title: Model
      description: >-
        A model resource representing an AI model registered in Llama Stack.
    ModelType:
      type: string
      enum:
        - llm
        - embedding
        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
    RunModerationRequest:
      type: object
      properties:
@ -8403,61 +8147,6 @@ components:
      required:
        - data
      title: ListScoringFunctionsResponse
    ParamType:
      oneOf:
        - $ref: '#/components/schemas/StringType'
        - $ref: '#/components/schemas/NumberType'
        - $ref: '#/components/schemas/BooleanType'
        - $ref: '#/components/schemas/ArrayType'
        - $ref: '#/components/schemas/ObjectType'
        - $ref: '#/components/schemas/JsonType'
        - $ref: '#/components/schemas/UnionType'
        - $ref: '#/components/schemas/ChatCompletionInputType'
        - $ref: '#/components/schemas/CompletionInputType'
      discriminator:
        propertyName: type
        mapping:
          string: '#/components/schemas/StringType'
          number: '#/components/schemas/NumberType'
          boolean: '#/components/schemas/BooleanType'
          array: '#/components/schemas/ArrayType'
          object: '#/components/schemas/ObjectType'
          json: '#/components/schemas/JsonType'
          union: '#/components/schemas/UnionType'
          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
          completion_input: '#/components/schemas/CompletionInputType'
    RegisterScoringFunctionRequest:
      type: object
      properties:
        scoring_fn_id:
          type: string
          description: >-
            The ID of the scoring function to register.
        description:
          type: string
          description: The description of the scoring function.
        return_type:
          $ref: '#/components/schemas/ParamType'
          description: The return type of the scoring function.
        provider_scoring_fn_id:
          type: string
          description: >-
            The ID of the provider scoring function to use for the scoring function.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the scoring function.
        params:
          $ref: '#/components/schemas/ScoringFnParams'
          description: >-
            The parameters for the scoring function for benchmark eval, these can
            be overridden for app eval.
      additionalProperties: false
      required:
        - scoring_fn_id
        - description
        - return_type
      title: RegisterScoringFunctionRequest
    ScoreRequest:
      type: object
      properties:
@ -8633,35 +8322,6 @@ components:
      required:
        - data
      title: ListShieldsResponse
    RegisterShieldRequest:
      type: object
      properties:
        shield_id:
          type: string
          description: >-
            The identifier of the shield to register.
        provider_shield_id:
          type: string
          description: >-
            The identifier of the shield in the provider.
        provider_id:
          type: string
          description: The identifier of the provider.
        params:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The parameters of the shield.
      additionalProperties: false
      required:
        - shield_id
      title: RegisterShieldRequest
    InvokeToolRequest:
      type: object
      properties:
@ -8922,37 +8582,6 @@ components:
      title: ListToolGroupsResponse
      description: >-
        Response containing a list of tool groups.
    RegisterToolGroupRequest:
      type: object
      properties:
        toolgroup_id:
          type: string
          description: The ID of the tool group to register.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the tool group.
        mcp_endpoint:
          $ref: '#/components/schemas/URL'
          description: >-
            The MCP endpoint to use for the tool group.
        args:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            A dictionary of arguments to pass to the tool group.
      additionalProperties: false
      required:
        - toolgroup_id
        - provider_id
      title: RegisterToolGroupRequest
    Chunk:
      type: object
      properties:
@ -9753,41 +9382,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
+          const: vector_store.file_content.page
-        filename:
+          default: vector_store.file_content.page
          type: string
          description: Name of the file
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
-            Key-value attributes associated with the file
+            The object type, which is always `vector_store.file_content.page`
-        content:
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
        has_more:
          type: boolean
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
          type: string
          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
+        - object
-        - filename
+        - data
-        - attributes
+        - has_more
-        - content
+      title: VectorStoreFileContentResponse
      title: VectorStoreFileContentsResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -998,39 +998,6 @@ paths:
      description: List models using the OpenAI API.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: A Model.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
      summary: Register model.
      description: >-
        Register model.
        Register a model.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterModelRequest'
        required: true
      deprecated: false
  /v1/models/{model_id}:
    get:
      responses:
@ -1065,36 +1032,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
      summary: Unregister model.
      description: >-
        Unregister model.
        Unregister a model.
      parameters:
        - name: model_id
          in: path
          description: >-
            The identifier of the model to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/moderations:
    post:
      responses:
@ -1725,32 +1662,6 @@ paths:
      description: List all scoring functions.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ScoringFunctions
      summary: Register a scoring function.
      description: Register a scoring function.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
        required: true
      deprecated: false
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -1782,33 +1693,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ScoringFunctions
      summary: Unregister a scoring function.
      description: Unregister a scoring function.
      parameters:
        - name: scoring_fn_id
          in: path
          description: >-
            The ID of the scoring function to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/scoring/score:
    post:
      responses:
@ -1897,36 +1781,6 @@ paths:
      description: List all shields.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: A Shield.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Shield'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Shields
      summary: Register a shield.
      description: Register a shield.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterShieldRequest'
        required: true
      deprecated: false
  /v1/shields/{identifier}:
    get:
      responses:
@ -1958,33 +1812,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Shields
      summary: Unregister a shield.
      description: Unregister a shield.
      parameters:
        - name: identifier
          in: path
          description: >-
            The identifier of the shield to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -2080,32 +1907,6 @@ paths:
      description: List tool groups with optional provider.
      parameters: []
      deprecated: false
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ToolGroups
      summary: Register a tool group.
      description: Register a tool group.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterToolGroupRequest'
        required: true
      deprecated: false
  /v1/toolgroups/{toolgroup_id}:
    get:
      responses:
@ -2137,32 +1938,6 @@ paths:
          schema:
            type: string
      deprecated: false
    delete:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - ToolGroups
      summary: Unregister a tool group.
      description: Unregister a tool group.
      parameters:
        - name: toolgroup_id
          in: path
          description: The ID of the tool group to unregister.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/tools:
    get:
      responses:
@ -2916,11 +2691,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -3171,7 +2946,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterDatasetRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1beta/datasets/{dataset_id}:
    get:
      responses:
@ -3228,7 +3003,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks:
    get:
      responses:
@ -3279,7 +3054,7 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterBenchmarkRequest'
        required: true
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}:
    get:
      responses:
@ -3336,7 +3111,7 @@ paths:
          required: true
          schema:
            type: string
-      deprecated: false
+      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -6280,46 +6055,6 @@ components:
      required:
        - data
      title: OpenAIListModelsResponse
    ModelType:
      type: string
      enum:
        - llm
        - embedding
        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
    RegisterModelRequest:
      type: object
      properties:
        model_id:
          type: string
          description: The identifier of the model to register.
        provider_model_id:
          type: string
          description: >-
            The identifier of the model in the provider.
        provider_id:
          type: string
          description: The identifier of the provider.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: Any additional metadata for this model.
        model_type:
          $ref: '#/components/schemas/ModelType'
          description: The type of model to register.
      additionalProperties: false
      required:
        - model_id
      title: RegisterModelRequest
    Model:
      type: object
      properties:
@ -6377,6 +6112,15 @@ components:
      title: Model
      description: >-
        A model resource representing an AI model registered in Llama Stack.
    ModelType:
      type: string
      enum:
        - llm
        - embedding
        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
    RunModerationRequest:
      type: object
      properties:
@ -9119,61 +8863,6 @@ components:
      required:
        - data
      title: ListScoringFunctionsResponse
    ParamType:
      oneOf:
        - $ref: '#/components/schemas/StringType'
        - $ref: '#/components/schemas/NumberType'
        - $ref: '#/components/schemas/BooleanType'
        - $ref: '#/components/schemas/ArrayType'
        - $ref: '#/components/schemas/ObjectType'
        - $ref: '#/components/schemas/JsonType'
        - $ref: '#/components/schemas/UnionType'
        - $ref: '#/components/schemas/ChatCompletionInputType'
        - $ref: '#/components/schemas/CompletionInputType'
      discriminator:
        propertyName: type
        mapping:
          string: '#/components/schemas/StringType'
          number: '#/components/schemas/NumberType'
          boolean: '#/components/schemas/BooleanType'
          array: '#/components/schemas/ArrayType'
          object: '#/components/schemas/ObjectType'
          json: '#/components/schemas/JsonType'
          union: '#/components/schemas/UnionType'
          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
          completion_input: '#/components/schemas/CompletionInputType'
    RegisterScoringFunctionRequest:
      type: object
      properties:
        scoring_fn_id:
          type: string
          description: >-
            The ID of the scoring function to register.
        description:
          type: string
          description: The description of the scoring function.
        return_type:
          $ref: '#/components/schemas/ParamType'
          description: The return type of the scoring function.
        provider_scoring_fn_id:
          type: string
          description: >-
            The ID of the provider scoring function to use for the scoring function.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the scoring function.
        params:
          $ref: '#/components/schemas/ScoringFnParams'
          description: >-
            The parameters for the scoring function for benchmark eval, these can
            be overridden for app eval.
      additionalProperties: false
      required:
        - scoring_fn_id
        - description
        - return_type
      title: RegisterScoringFunctionRequest
    ScoreRequest:
      type: object
      properties:
@ -9349,35 +9038,6 @@ components:
      required:
        - data
      title: ListShieldsResponse
    RegisterShieldRequest:
      type: object
      properties:
        shield_id:
          type: string
          description: >-
            The identifier of the shield to register.
        provider_shield_id:
          type: string
          description: >-
            The identifier of the shield in the provider.
        provider_id:
          type: string
          description: The identifier of the provider.
        params:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The parameters of the shield.
      additionalProperties: false
      required:
        - shield_id
      title: RegisterShieldRequest
    InvokeToolRequest:
      type: object
      properties:
@ -9638,37 +9298,6 @@ components:
      title: ListToolGroupsResponse
      description: >-
        Response containing a list of tool groups.
    RegisterToolGroupRequest:
      type: object
      properties:
        toolgroup_id:
          type: string
          description: The ID of the tool group to register.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the tool group.
        mcp_endpoint:
          $ref: '#/components/schemas/URL'
          description: >-
            The MCP endpoint to use for the tool group.
        args:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            A dictionary of arguments to pass to the tool group.
      additionalProperties: false
      required:
        - toolgroup_id
        - provider_id
      title: RegisterToolGroupRequest
    Chunk:
      type: object
      properties:
@ -10469,41 +10098,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
+          const: vector_store.file_content.page
-        filename:
+          default: vector_store.file_content.page
          type: string
          description: Name of the file
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
-            Key-value attributes associated with the file
+            The object type, which is always `vector_store.file_content.page`
-        content:
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
        has_more:
          type: boolean
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
          type: string
          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
+        - object
-        - filename
+        - data
-        - attributes
+        - has_more
-        - content
+      title: VectorStoreFileContentResponse
      title: VectorStoreFileContentsResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
@ -10820,68 +10443,6 @@ components:
        - data
      title: ListDatasetsResponse
      description: Response from listing datasets.
    DataSource:
      oneOf:
        - $ref: '#/components/schemas/URIDataSource'
        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
          uri: '#/components/schemas/URIDataSource'
          rows: '#/components/schemas/RowsDataSource'
    RegisterDatasetRequest:
      type: object
      properties:
        purpose:
          type: string
          enum:
            - post-training/messages
            - eval/question-answer
            - eval/messages-answer
          description: >-
            The purpose of the dataset. One of: - "post-training/messages": The dataset
            contains a messages column with list of messages for post-training. {
            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
            contains a question column and an answer column for evaluation. { "question":
            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
            The dataset contains a messages column with list of messages and an answer
            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
            Doe. How can I help you today?"}, {"role": "user", "content": "What's
            my name?"}, ], "answer": "John Doe" }
        source:
          $ref: '#/components/schemas/DataSource'
          description: >-
            The data source of the dataset. Ensure that the data source schema is
            compatible with the purpose of the dataset. Examples: - { "type": "uri",
            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
            } ] }
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            The metadata for the dataset. - E.g. {"description": "My dataset"}.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset. If not provided, an ID will be generated.
      additionalProperties: false
      required:
        - purpose
        - source
      title: RegisterDatasetRequest
    Benchmark:
      type: object
      properties:
@ -10949,47 +10510,6 @@ components:
      required:
        - data
      title: ListBenchmarksResponse
    RegisterBenchmarkRequest:
      type: object
      properties:
        benchmark_id:
          type: string
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to use for the benchmark.
        scoring_functions:
          type: array
          items:
            type: string
          description: >-
            The scoring functions to use for the benchmark.
        provider_benchmark_id:
          type: string
          description: >-
            The ID of the provider benchmark to use for the benchmark.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the benchmark.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The metadata to use for the benchmark.
      additionalProperties: false
      required:
        - benchmark_id
        - dataset_id
        - scoring_functions
      title: RegisterBenchmarkRequest
    BenchmarkConfig:
      type: object
      properties:
@ -11851,6 +11371,109 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
    DataSource:
      oneOf:
        - $ref: '#/components/schemas/URIDataSource'
        - $ref: '#/components/schemas/RowsDataSource'
      discriminator:
        propertyName: type
        mapping:
          uri: '#/components/schemas/URIDataSource'
          rows: '#/components/schemas/RowsDataSource'
    RegisterDatasetRequest:
      type: object
      properties:
        purpose:
          type: string
          enum:
            - post-training/messages
            - eval/question-answer
            - eval/messages-answer
          description: >-
            The purpose of the dataset. One of: - "post-training/messages": The dataset
            contains a messages column with list of messages for post-training. {
            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
            contains a question column and an answer column for evaluation. { "question":
            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
            The dataset contains a messages column with list of messages and an answer
            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
            Doe. How can I help you today?"}, {"role": "user", "content": "What's
            my name?"}, ], "answer": "John Doe" }
        source:
          $ref: '#/components/schemas/DataSource'
          description: >-
            The data source of the dataset. Ensure that the data source schema is
            compatible with the purpose of the dataset. Examples: - { "type": "uri",
            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
            } ] }
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            The metadata for the dataset. - E.g. {"description": "My dataset"}.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset. If not provided, an ID will be generated.
      additionalProperties: false
      required:
        - purpose
        - source
      title: RegisterDatasetRequest
    RegisterBenchmarkRequest:
      type: object
      properties:
        benchmark_id:
          type: string
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to use for the benchmark.
        scoring_functions:
          type: array
          items:
            type: string
          description: >-
            The scoring functions to use for the benchmark.
        provider_benchmark_id:
          type: string
          description: >-
            The ID of the provider benchmark to use for the benchmark.
        provider_id:
          type: string
          description: >-
            The ID of the provider to use for the benchmark.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: The metadata to use for the benchmark.
      additionalProperties: false
      required:
        - benchmark_id
        - dataset_id
        - scoring_functions
      title: RegisterBenchmarkRequest
  responses:
    BadRequest400:
      description: The request was invalid or malformed
--- a/pyproject.toml
+++ b/pyproject.toml
@ -298,6 +298,7 @@ exclude = [
    "^src/llama_stack/providers/remote/agents/sample/",
    "^src/llama_stack/providers/remote/datasetio/huggingface/",
    "^src/llama_stack/providers/remote/datasetio/nvidia/",
    "^src/llama_stack/providers/remote/inference/oci/",
    "^src/llama_stack/providers/remote/inference/bedrock/",
    "^src/llama_stack/providers/remote/inference/nvidia/",
    "^src/llama_stack/providers/remote/inference/passthrough/",
--- a/src/llama_stack/apis/benchmarks/benchmarks.py
+++ b/src/llama_stack/apis/benchmarks/benchmarks.py
@ -74,7 +74,7 @@ class Benchmarks(Protocol):
        """
        ...
-    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
    async def register_benchmark(
        self,
        benchmark_id: str,
@ -95,7 +95,7 @@ class Benchmarks(Protocol):
        """
        ...
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
    async def unregister_benchmark(self, benchmark_id: str) -> None:
        """Unregister a benchmark.
--- a/src/llama_stack/apis/datasets/datasets.py
+++ b/src/llama_stack/apis/datasets/datasets.py
@ -146,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
 class Datasets(Protocol):
-    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
    async def register_dataset(
        self,
        purpose: DatasetPurpose,
@ -235,7 +235,7 @@ class Datasets(Protocol):
        """
        ...
-    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
    async def unregister_dataset(
        self,
        dataset_id: str,
--- a/src/llama_stack/apis/models/models.py
+++ b/src/llama_stack/apis/models/models.py
@ -136,7 +136,7 @@ class Models(Protocol):
        """
        ...
-    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_model(
        self,
        model_id: str,
@ -158,7 +158,7 @@ class Models(Protocol):
        """
        ...
-    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    async def unregister_model(
        self,
        model_id: str,
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama_stack/apis/scoring_functions/scoring_functions.py
@ -178,7 +178,7 @@ class ScoringFunctions(Protocol):
        """
        ...
-    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_scoring_function(
        self,
        scoring_fn_id: str,
@ -199,7 +199,9 @@ class ScoringFunctions(Protocol):
        """
        ...
-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(
        route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
    )
    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
        """Unregister a scoring function.
--- a/src/llama_stack/apis/shields/shields.py
+++ b/src/llama_stack/apis/shields/shields.py
@ -67,7 +67,7 @@ class Shields(Protocol):
        """
        ...
-    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_shield(
        self,
        shield_id: str,
@ -85,7 +85,7 @@ class Shields(Protocol):
        """
        ...
-    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    async def unregister_shield(self, identifier: str) -> None:
        """Unregister a shield.
--- a/src/llama_stack/apis/tools/tools.py
+++ b/src/llama_stack/apis/tools/tools.py
@ -109,7 +109,7 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable
@telemetry_traceable
 class ToolGroups(Protocol):
-    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    async def register_tool_group(
        self,
        toolgroup_id: str,
@ -167,7 +167,7 @@ class ToolGroups(Protocol):
        """
        ...
-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    async def unregister_toolgroup(
        self,
        toolgroup_id: str,
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):
@json_schema_type
-class VectorStoreFileContentsResponse(BaseModel):
+class VectorStoreFileContentResponse(BaseModel):
-    """Response from retrieving the contents of a vector store file.
+    """Represents the parsed content of a vector store file.
-    :param file_id: Unique identifier for the file
+    :param object: The object type, which is always `vector_store.file_content.page`
-    :param filename: Name of the file
+    :param data: Parsed content of the file
-    :param attributes: Key-value attributes associated with the file
+    :param has_more: Indicates if there are more content pages to fetch
-    :param content: List of content items from the file
+    :param next_page: The token for the next page, if any
    """
-    file_id: str
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
-    filename: str
+    data: list[VectorStoreContent]
-    attributes: dict[str, Any]
+    has_more: bool
-    content: list[VectorStoreContent]
+    next_page: str | None = None
@json_schema_type
@ -732,12 +732,12 @@ class VectorIO(Protocol):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file.
        :param vector_store_id: The ID of the vector store containing the file to retrieve.
        :param file_id: The ID of the file to retrieve.
-        :returns: A list of InterleavedContent representing the file contents.
+        :returns: A VectorStoreFileContentResponse representing the file contents.
        """
        ...
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreChunkingStrategyStaticConfig,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFilesListInBatchResponse,
@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
        provider = await self.routing_table.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFileStatus,
@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
--- a/src/llama_stack/distributions/oci/init.py
+++ b/src/llama_stack/distributions/oci/init.py
@ -0,0 +1,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .oci import get_distribution_template  # noqa: F401
--- a/src/llama_stack/distributions/oci/build.yaml
+++ b/src/llama_stack/distributions/oci/build.yaml
@ -0,0 +1,35 @@
 version: 2
 distribution_spec:
  description: Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM
    inference with scalable cloud services
  providers:
    inference:
    - provider_type: remote::oci
    vector_io:
    - provider_type: inline::faiss
    - provider_type: remote::chromadb
    - provider_type: remote::pgvector
    safety:
    - provider_type: inline::llama-guard
    agents:
    - provider_type: inline::meta-reference
    eval:
    - provider_type: inline::meta-reference
    datasetio:
    - provider_type: remote::huggingface
    - provider_type: inline::localfs
    scoring:
    - provider_type: inline::basic
    - provider_type: inline::llm-as-judge
    - provider_type: inline::braintrust
    tool_runtime:
    - provider_type: remote::brave-search
    - provider_type: remote::tavily-search
    - provider_type: inline::rag-runtime
    - provider_type: remote::model-context-protocol
    files:
    - provider_type: inline::localfs
 image_type: venv
 additional_pip_packages:
 - aiosqlite
 - sqlalchemy[asyncio]
--- a/src/llama_stack/distributions/oci/doc_template.md
+++ b/src/llama_stack/distributions/oci/doc_template.md
@ -0,0 +1,140 @@
 ---
 orphan: true
 ---
 # OCI Distribution
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
 {{ providers_table }}
 {% if run_config_env_vars %}
 ### Environment Variables
 The following environment variables can be configured:
 {% for var, (default_value, description) in run_config_env_vars.items() %}
 - `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
 {% endfor %}
 {% endif %}
 {% if default_models %}
 ### Models
 The following models are available by default:
 {% for model in default_models %}
 - `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 ## Prerequisites
 ### Oracle Cloud Infrastructure Setup
 Before using the OCI Generative AI distribution, ensure you have:
 1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/)
 2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy
 3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models
 4. **Authentication**: Configure authentication using either:
   - **Instance Principal** (recommended for cloud-hosted deployments)
   - **API Key** (for on-premises or development environments)
 ### Authentication Methods
 #### Instance Principal Authentication (Recommended)
 Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments.
 Requirements:
 - Instance must be running in an Oracle Cloud Infrastructure compartment
 - Instance must have appropriate IAM policies to access Generative AI services
 #### API Key Authentication
 For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file.
 ### Required IAM Policies
 Ensure your OCI user or instance has the following policy statements:
 ```
 Allow group <group_name> to use generative-ai-inference-endpoints in compartment <compartment_name>
 Allow group <group_name> to manage generative-ai-inference-endpoints in compartment <compartment_name>
 ```
 ## Supported Services
 ### Inference: OCI Generative AI
 Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports:
 - **Chat Completions**: Conversational AI with context awareness
 - **Text Generation**: Complete prompts and generate text content
 #### Available Models
 Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models.
 ### Safety: Llama Guard
 For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide:
 - Content filtering and moderation
 - Policy compliance checking
 - Harmful content detection
 ### Vector Storage: Multiple Options
 The distribution supports several vector storage providers:
 - **FAISS**: Local in-memory vector search
 - **ChromaDB**: Distributed vector database
 - **PGVector**: PostgreSQL with vector extensions
 ### Additional Services
 - **Dataset I/O**: Local filesystem and Hugging Face integration
 - **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities
 - **Evaluation**: Meta reference evaluation framework
 ## Running Llama Stack with OCI
 You can run the OCI distribution via Docker or local virtual environment.
 ### Via venv
 If you've set up your local development environment, you can also build the image using your local virtual environment.
 ```bash
 OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci
 ```
 ### Configuration Examples
 #### Using Instance Principal (Recommended for Production)
 ```bash
 export OCI_AUTH_TYPE=instance_principal
 export OCI_REGION=us-chicago-1
 export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..<your-compartment-id>
 ```
 #### Using API Key Authentication (Development)
 ```bash
 export OCI_AUTH_TYPE=config_file
 export OCI_CONFIG_FILE_PATH=~/.oci/config
 export OCI_CLI_PROFILE=DEFAULT
 export OCI_REGION=us-chicago-1
 export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id
 ```
 ## Regional Endpoints
 OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit:
 https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions
 ## Troubleshooting
 ### Common Issues
 1. **Authentication Errors**: Verify your OCI credentials and IAM policies
 2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region
 3. **Permission Denied**: Check compartment permissions and Generative AI service access
 4. **Region Unavailable**: Verify the specified region supports Generative AI services
 ### Getting Help
 For additional support:
 - [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
 - [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues)
--- a/src/llama_stack/distributions/oci/oci.py
+++ b/src/llama_stack/distributions/oci/oci.py
@ -0,0 +1,108 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from pathlib import Path
 from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
 def get_distribution_template(name: str = "oci") -> DistributionTemplate:
    providers = {
        "inference": [BuildProvider(provider_type="remote::oci")],
        "vector_io": [
            BuildProvider(provider_type="inline::faiss"),
            BuildProvider(provider_type="remote::chromadb"),
            BuildProvider(provider_type="remote::pgvector"),
        ],
        "safety": [BuildProvider(provider_type="inline::llama-guard")],
        "agents": [BuildProvider(provider_type="inline::meta-reference")],
        "eval": [BuildProvider(provider_type="inline::meta-reference")],
        "datasetio": [
            BuildProvider(provider_type="remote::huggingface"),
            BuildProvider(provider_type="inline::localfs"),
        ],
        "scoring": [
            BuildProvider(provider_type="inline::basic"),
            BuildProvider(provider_type="inline::llm-as-judge"),
            BuildProvider(provider_type="inline::braintrust"),
        ],
        "tool_runtime": [
            BuildProvider(provider_type="remote::brave-search"),
            BuildProvider(provider_type="remote::tavily-search"),
            BuildProvider(provider_type="inline::rag-runtime"),
            BuildProvider(provider_type="remote::model-context-protocol"),
        ],
        "files": [BuildProvider(provider_type="inline::localfs")],
    }
    inference_provider = Provider(
        provider_id="oci",
        provider_type="remote::oci",
        config=OCIConfig.sample_run_config(),
    )
    vector_io_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
    )
    files_provider = Provider(
        provider_id="meta-reference-files",
        provider_type="inline::localfs",
        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
    )
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
            provider_id="tavily-search",
        ),
    ]
    return DistributionTemplate(
        name=name,
        distro_type="remote_hosted",
        description="Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM inference with scalable cloud services",
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "vector_io": [vector_io_provider],
                    "files": [files_provider],
                },
                default_tool_groups=default_tool_groups,
            ),
        },
        run_config_env_vars={
            "OCI_AUTH_TYPE": (
                "instance_principal",
                "OCI authentication type (instance_principal or config_file)",
            ),
            "OCI_REGION": (
                "",
                "OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1)",
            ),
            "OCI_COMPARTMENT_OCID": (
                "",
                "OCI compartment ID for the Generative AI service",
            ),
            "OCI_CONFIG_FILE_PATH": (
                "~/.oci/config",
                "OCI config file path (required if OCI_AUTH_TYPE is config_file)",
            ),
            "OCI_CLI_PROFILE": (
                "DEFAULT",
                "OCI CLI profile name to use from config file",
            ),
        },
    )
--- a/src/llama_stack/distributions/oci/run.yaml
+++ b/src/llama_stack/distributions/oci/run.yaml
@ -0,0 +1,136 @@
 version: 2
 image_name: oci
 apis:
 - agents
 - datasetio
 - eval
 - files
 - inference
 - safety
 - scoring
 - tool_runtime
 - vector_io
 providers:
  inference:
  - provider_id: oci
    provider_type: remote::oci
    config:
      oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
      oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
      oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
      oci_region: ${env.OCI_REGION:=us-ashburn-1}
      oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      persistence:
        namespace: vector_io::faiss
        backend: kv_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config:
      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence:
        agent_state:
          namespace: agents
          backend: kv_default
        responses:
          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
        namespace: eval
        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
        namespace: datasetio::huggingface
        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        namespace: datasetio::localfs
        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
  - provider_id: llm-as-judge
    provider_type: inline::llm-as-judge
  - provider_id: braintrust
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:=}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
  files:
  - provider_id: meta-reference-files
    provider_type: inline::localfs
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files}
      metadata_store:
        table_name: files_metadata
        backend: sql_default
 storage:
  backends:
    kv_default:
      type: kv_sqlite
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db
    sql_default:
      type: sql_sqlite
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db
  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
    prompts:
      namespace: prompts
      backend: kv_default
 registered_resources:
  models: []
  shields: []
  vector_dbs: []
  datasets: []
  scoring_fns: []
  benchmarks: []
  tool_groups:
  - toolgroup_id: builtin::websearch
    provider_id: tavily-search
 server:
  port: 8321
 telemetry:
  enabled: true
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -223,7 +223,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
    async def register_vector_store(self, vector_store: VectorStore) -> None:
-        assert self.kvstore is not None
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
@ -239,7 +240,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
        return [i.vector_store for i in self.cache.values()]
    async def unregister_vector_store(self, vector_store_id: str) -> None:
-        assert self.kvstore is not None
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
        if vector_store_id not in self.cache:
            return
@ -248,6 +250,27 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
        del self.cache[vector_store_id]
        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]
        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
        vector_store_data = await self.kvstore.get(key)
        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)
        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
            inference_api=self.inference_api,
        )
        self.cache[vector_store_id] = index
        return index
    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
        index = self.cache.get(vector_store_id)
        if index is None:
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -412,6 +412,14 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
        return [v.vector_store for v in self.cache.values()]
    async def register_vector_store(self, vector_store: VectorStore) -> None:
        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
        # Save to kvstore for persistence
        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
        # Create and cache the index
        index = await SQLiteVecIndex.create(
            vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
        )
@ -421,13 +429,16 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]
-        if self.vector_store_table is None:
+        # Try to load from kvstore
-            raise VectorStoreNotFoundError(vector_store_id)
+        if self.kvstore is None:
-
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
-        vector_store = self.vector_store_table.get_vector_store(vector_store_id)
+
-        if not vector_store:
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
        vector_store_data = await self.kvstore.get(key)
        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)
        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=SQLiteVecIndex(
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@ -297,6 +297,20 @@ Available Models:
 Azure OpenAI inference provider for accessing GPT models and other Azure services.
 Provider documentation
 https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 """,
        ),
        RemoteProviderSpec(
            api=Api.inference,
            provider_type="remote::oci",
            adapter_type="oci",
            pip_packages=["oci"],
            module="llama_stack.providers.remote.inference.oci",
            config_class="llama_stack.providers.remote.inference.oci.config.OCIConfig",
            provider_data_validator="llama_stack.providers.remote.inference.oci.config.OCIProviderDataValidator",
            description="""
 Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
 Provider documentation
 https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
 """,
        ),
    ]
--- a/src/llama_stack/providers/remote/inference/oci/init.py
+++ b/src/llama_stack/providers/remote/inference/oci/init.py
@ -0,0 +1,17 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.inference import InferenceProvider
 from .config import OCIConfig
 async def get_adapter_impl(config: OCIConfig, _deps) -> InferenceProvider:
    from .oci import OCIInferenceAdapter
    adapter = OCIInferenceAdapter(config=config)
    await adapter.initialize()
    return adapter
--- a/src/llama_stack/providers/remote/inference/oci/auth.py
+++ b/src/llama_stack/providers/remote/inference/oci/auth.py
@ -0,0 +1,79 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from collections.abc import Generator, Mapping
 from typing import Any, override
 import httpx
 import oci
 import requests
 from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE
 OciAuthSigner = type[oci.signer.AbstractBaseSigner]
 class HttpxOciAuth(httpx.Auth):
    """
    Custom HTTPX authentication class that implements OCI request signing.
    This class handles the authentication flow for HTTPX requests by signing them
    using the OCI Signer, which adds the necessary authentication headers for
    OCI API calls.
    Attributes:
        signer (oci.signer.Signer): The OCI signer instance used for request signing
    """
    def __init__(self, signer: OciAuthSigner):
        self.signer = signer
    @override
    def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]:
        # Read the request content to handle streaming requests properly
        try:
            content = request.content
        except httpx.RequestNotRead:
            # For streaming requests, we need to read the content first
            content = request.read()
        req = requests.Request(
            method=request.method,
            url=str(request.url),
            headers=dict(request.headers),
            data=content,
        )
        prepared_request = req.prepare()
        # Sign the request using the OCI Signer
        self.signer.do_request_sign(prepared_request)  # type: ignore
        # Update the original HTTPX request with the signed headers
        request.headers.update(prepared_request.headers)
        yield request
 class OciInstancePrincipalAuth(HttpxOciAuth):
    def __init__(self, **kwargs: Mapping[str, Any]):
        self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(**kwargs)
 class OciUserPrincipalAuth(HttpxOciAuth):
    def __init__(self, config_file: str = DEFAULT_LOCATION, profile_name: str = DEFAULT_PROFILE):
        config = oci.config.from_file(config_file, profile_name)
        oci.config.validate_config(config)  # type: ignore
        key_content = ""
        with open(config["key_file"]) as f:
            key_content = f.read()
        self.signer = oci.signer.Signer(
            tenancy=config["tenancy"],
            user=config["user"],
            fingerprint=config["fingerprint"],
            private_key_file_location=config.get("key_file"),
            pass_phrase="none",  # type: ignore
            private_key_content=key_content,
        )
--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@ -0,0 +1,75 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
 from typing import Any
 from pydantic import BaseModel, Field
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.schema_utils import json_schema_type
 class OCIProviderDataValidator(BaseModel):
    oci_auth_type: str = Field(
        description="OCI authentication type (must be one of: instance_principal, config_file)",
    )
    oci_region: str = Field(
        description="OCI region (e.g., us-ashburn-1)",
    )
    oci_compartment_id: str = Field(
        description="OCI compartment ID for the Generative AI service",
    )
    oci_config_file_path: str | None = Field(
        default="~/.oci/config",
        description="OCI config file path (required if oci_auth_type is config_file)",
    )
    oci_config_profile: str | None = Field(
        default="DEFAULT",
        description="OCI config profile (required if oci_auth_type is config_file)",
    )
@json_schema_type
 class OCIConfig(RemoteInferenceProviderConfig):
    oci_auth_type: str = Field(
        description="OCI authentication type (must be one of: instance_principal, config_file)",
        default_factory=lambda: os.getenv("OCI_AUTH_TYPE", "instance_principal"),
    )
    oci_region: str = Field(
        default_factory=lambda: os.getenv("OCI_REGION", "us-ashburn-1"),
        description="OCI region (e.g., us-ashburn-1)",
    )
    oci_compartment_id: str = Field(
        default_factory=lambda: os.getenv("OCI_COMPARTMENT_OCID", ""),
        description="OCI compartment ID for the Generative AI service",
    )
    oci_config_file_path: str = Field(
        default_factory=lambda: os.getenv("OCI_CONFIG_FILE_PATH", "~/.oci/config"),
        description="OCI config file path (required if oci_auth_type is config_file)",
    )
    oci_config_profile: str = Field(
        default_factory=lambda: os.getenv("OCI_CLI_PROFILE", "DEFAULT"),
        description="OCI config profile (required if oci_auth_type is config_file)",
    )
    @classmethod
    def sample_run_config(
        cls,
        oci_auth_type: str = "${env.OCI_AUTH_TYPE:=instance_principal}",
        oci_config_file_path: str = "${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}",
        oci_config_profile: str = "${env.OCI_CLI_PROFILE:=DEFAULT}",
        oci_region: str = "${env.OCI_REGION:=us-ashburn-1}",
        oci_compartment_id: str = "${env.OCI_COMPARTMENT_OCID:=}",
        **kwargs,
    ) -> dict[str, Any]:
        return {
            "oci_auth_type": oci_auth_type,
            "oci_config_file_path": oci_config_file_path,
            "oci_config_profile": oci_config_profile,
            "oci_region": oci_region,
            "oci_compartment_id": oci_compartment_id,
        }
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@ -0,0 +1,140 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from collections.abc import Iterable
 from typing import Any
 import httpx
 import oci
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
 from llama_stack.apis.inference.inference import (
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
 )
 from llama_stack.apis.models import ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 logger = get_logger(name=__name__, category="inference::oci")
 OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal"
 OCI_AUTH_TYPE_CONFIG_FILE = "config_file"
 VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE]
 DEFAULT_OCI_REGION = "us-ashburn-1"
 MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"]
 class OCIInferenceAdapter(OpenAIMixin):
    config: OCIConfig
    async def initialize(self) -> None:
        """Initialize and validate OCI configuration."""
        if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES:
            raise ValueError(
                f"Invalid OCI authentication type: {self.config.oci_auth_type}."
                f"Valid types are one of: {VALID_OCI_AUTH_TYPES}"
            )
        if not self.config.oci_compartment_id:
            raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.")
    def get_base_url(self) -> str:
        region = self.config.oci_region or DEFAULT_OCI_REGION
        return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1"
    def get_api_key(self) -> str | None:
        # OCI doesn't use API keys, it uses request signing
        return "<NOTUSED>"
    def get_extra_client_params(self) -> dict[str, Any]:
        """
        Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers.
        """
        auth = self._get_auth()
        compartment_id = self.config.oci_compartment_id or ""
        return {
            "http_client": DefaultAsyncHttpxClient(
                auth=auth,
                headers={
                    "CompartmentId": compartment_id,
                },
            ),
        }
    def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None:
        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
            return oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
        return None
    def _get_oci_config(self) -> dict:
        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
            config = {"region": self.config.oci_region}
        elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
            config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile)
            if not config.get("region"):
                raise ValueError(
                    "Region not specified in config. Please specify in config or with OCI_REGION env variable."
                )
        return config
    def _get_auth(self) -> httpx.Auth:
        if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
            return OciInstancePrincipalAuth()
        elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
            return OciUserPrincipalAuth(
                config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile
            )
        else:
            raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}")
    async def list_provider_model_ids(self) -> Iterable[str]:
        """
        List available models from OCI Generative AI service.
        """
        oci_config = self._get_oci_config()
        oci_signer = self._get_oci_signer()
        compartment_id = self.config.oci_compartment_id or ""
        if oci_signer is None:
            client = GenerativeAiClient(config=oci_config)
        else:
            client = GenerativeAiClient(config=oci_config, signer=oci_signer)
        models: ModelCollection = client.list_models(
            compartment_id=compartment_id, capability=MODEL_CAPABILITIES, lifecycle_state="ACTIVE"
        ).data
        seen_models = set()
        model_ids = []
        for model in models.items:
            if model.time_deprecated or model.time_on_demand_retired:
                continue
            if "CHAT" not in model.capabilities or "FINE_TUNE" in model.capabilities:
                continue
            # Use display_name + model_type as the key to avoid conflicts
            model_key = (model.display_name, ModelType.llm)
            if model_key in seen_models:
                continue
            seen_models.add(model_key)
            model_ids.append(model.display_name)
        return model_ids
    async def openai_embeddings(self, params: OpenAIEmbeddingsRequestWithExtraBody) -> OpenAIEmbeddingsResponse:
        # The constructed url is a mask that hits OCI's "chat" action, which is not supported for embeddings.
        raise NotImplementedError("OCI Provider does not (currently) support embeddings")
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -131,7 +131,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.persistence)
        self.vector_store_table = self.kvstore
        if isinstance(self.config, RemoteChromaVectorIOConfig):
            log.info(f"Connecting to Chroma server at: {self.config.url}")
@ -190,9 +189,16 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        # Try to load from kvstore
-        if not vector_store:
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
        vector_store_data = await self.kvstore.get(key)
        if not vector_store_data:
            raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
        vector_store = VectorStore.model_validate_json(vector_store_data)
        collection = await maybe_await(self.client.get_collection(vector_store_id))
        if not collection:
            raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -328,13 +328,16 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]
-        if self.vector_store_table is None:
+        # Try to load from kvstore
-            raise VectorStoreNotFoundError(vector_store_id)
+        if self.kvstore is None:
-
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+
-        if not vector_store:
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
        vector_store_data = await self.kvstore.get(key)
        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)
        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -368,6 +368,22 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
            log.exception("Could not connect to PGVector database server")
            raise RuntimeError("Could not connect to PGVector database server") from e
        # Load existing vector stores from KV store into cache
        start_key = VECTOR_DBS_PREFIX
        end_key = f"{VECTOR_DBS_PREFIX}\xff"
        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
        for vector_store_data in stored_vector_stores:
            vector_store = VectorStore.model_validate_json(vector_store_data)
            pgvector_index = PGVectorIndex(
                vector_store=vector_store,
                dimension=vector_store.embedding_dimension,
                conn=self.conn,
                kvstore=self.kvstore,
            )
            await pgvector_index.initialize()
            index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
            self.cache[vector_store.identifier] = index
    async def shutdown(self) -> None:
        if self.conn is not None:
            self.conn.close()
@ -377,7 +393,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
    async def register_vector_store(self, vector_store: VectorStore) -> None:
        # Persist vector DB metadata in the KV store
-        assert self.kvstore is not None
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
        # Save to kvstore for persistence
        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
        # Upsert model metadata in Postgres
        upsert_models(self.conn, [(vector_store.identifier, vector_store)])
@ -396,7 +418,8 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
            del self.cache[vector_store_id]
        # Delete vector DB metadata from KV store
-        assert self.kvstore is not None
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
        await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
@ -413,13 +436,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]
-        if self.vector_store_table is None:
+        # Try to load from kvstore
-            raise VectorStoreNotFoundError(vector_store_id)
+        if self.kvstore is None:
-
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+
-        if not vector_store:
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
        vector_store_data = await self.kvstore.get(key)
        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)
        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
        await index.initialize()
        self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -183,7 +183,8 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        await super().shutdown()
    async def register_vector_store(self, vector_store: VectorStore) -> None:
-        assert self.kvstore is not None
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
@ -200,20 +201,24 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
            await self.cache[vector_store_id].index.delete()
            del self.cache[vector_store_id]
-        assert self.kvstore is not None
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]
-        if self.vector_store_table is None:
+        # Try to load from kvstore
-            raise ValueError(f"Vector DB not found {vector_store_id}")
+        if self.kvstore is None:
            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
-        if not vector_store:
+        vector_store_data = await self.kvstore.get(key)
        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)
        vector_store = VectorStore.model_validate_json(vector_store_data)
        index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@ -346,13 +346,16 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
        if vector_store_id in self.cache:
            return self.cache[vector_store_id]
-        if self.vector_store_table is None:
+        # Try to load from kvstore
-            raise VectorStoreNotFoundError(vector_store_id)
+        if self.kvstore is None:
-
+            raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
-        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+
-        if not vector_store:
+        key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
        vector_store_data = await self.kvstore.get(key)
        if not vector_store_data:
            raise VectorStoreNotFoundError(vector_store_id)
        vector_store = VectorStore.model_validate_json(vector_store_data)
        client = self._get_client()
        sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
        if not client.collections.exists(sanitized_collection_name):
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreContent,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileCounts,
    VectorStoreFileDeleteResponse,
    VectorStoreFileLastError,
@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file."""
        if vector_store_id not in self.openai_vector_stores:
            raise VectorStoreNotFoundError(vector_store_id)
        file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
        dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
        chunks = [Chunk.model_validate(c) for c in dict_chunks]
        content = []
        for chunk in chunks:
            content.extend(self._chunk_to_vector_store_content(chunk))
-        return VectorStoreFileContentsResponse(
+        return VectorStoreFileContentResponse(
-            file_id=file_id,
+            object="vector_store.file_content.page",
-            filename=file_info.get("filename", ""),
+            data=content,
-            attributes=file_info.get("attributes", {}),
+            has_more=False,
-            content=content,
+            next_page=None,
        )
    async def openai_update_vector_store_file(
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -54,6 +54,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        # {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
        # or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
        "remote::groq",
        "remote::oci",
        "remote::gemini",  # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
        "remote::anthropic",  # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
        "remote::azure",  # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -138,6 +138,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
        "remote::runpod",
        "remote::sambanova",
        "remote::tgi",
        "remote::oci",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
    )
    assert file_contents is not None
-    assert len(file_contents.content) == 1
+    assert file_contents.object == "vector_store.file_content.page"
-    content = file_contents.content[0]
+    assert len(file_contents.data) == 1
    content = file_contents.data[0]
    # llama-stack-client returns a model, openai-python is a badboy and returns a dict
    if not isinstance(content, dict):
        content = content.model_dump()
    assert content["type"] == "text"
    assert content["text"] == test_content.decode("utf-8")
-    assert file_contents.filename == file_name
+    assert file_contents.has_more is False
    assert file_contents.attributes == attributes
@vector_provider_wrapper
@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        )
        assert file_contents is not None
-        assert file_contents.filename == file_data[i][0]
+        assert file_contents.object == "vector_store.file_content.page"
-        assert len(file_contents.content) > 0
+        assert len(file_contents.data) > 0
        # Verify the content matches what we uploaded
        content_text = (
-            file_contents.content[0].text
+            file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
            if hasattr(file_contents.content[0], "text")
            else file_contents.content[0]["text"]
        )
        assert file_data[i][1].decode("utf-8") in content_text
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -92,6 +92,99 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
    await vector_io_adapter.shutdown()
 async def test_vector_store_lazy_loading_from_kvstore(vector_io_adapter):
    """
    Test that vector stores can be lazy-loaded from KV store when not in cache.
    Verifies that clearing the cache doesn't break vector store access - they
    can be loaded on-demand from persistent storage.
    """
    await vector_io_adapter.initialize()
    vector_store_id = f"lazy_load_test_{np.random.randint(1e6)}"
    vector_store = VectorStore(
        identifier=vector_store_id,
        provider_id="test_provider",
        embedding_model="test_model",
        embedding_dimension=128,
    )
    await vector_io_adapter.register_vector_store(vector_store)
    assert vector_store_id in vector_io_adapter.cache
    vector_io_adapter.cache.clear()
    assert vector_store_id not in vector_io_adapter.cache
    loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
    assert loaded_index is not None
    assert loaded_index.vector_store.identifier == vector_store_id
    assert vector_store_id in vector_io_adapter.cache
    cached_index = await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
    assert cached_index is loaded_index
    await vector_io_adapter.shutdown()
 async def test_vector_store_preloading_on_initialization(vector_io_adapter):
    """
    Test that vector stores are preloaded from KV store during initialization.
    Verifies that after restart, all vector stores are automatically loaded into
    cache and immediately accessible without requiring lazy loading.
    """
    await vector_io_adapter.initialize()
    vector_store_ids = [f"preload_test_{i}_{np.random.randint(1e6)}" for i in range(3)]
    for vs_id in vector_store_ids:
        vector_store = VectorStore(
            identifier=vs_id,
            provider_id="test_provider",
            embedding_model="test_model",
            embedding_dimension=128,
        )
        await vector_io_adapter.register_vector_store(vector_store)
    for vs_id in vector_store_ids:
        assert vs_id in vector_io_adapter.cache
    await vector_io_adapter.shutdown()
    await vector_io_adapter.initialize()
    for vs_id in vector_store_ids:
        assert vs_id in vector_io_adapter.cache
    for vs_id in vector_store_ids:
        loaded_index = await vector_io_adapter._get_and_cache_vector_store_index(vs_id)
        assert loaded_index is not None
        assert loaded_index.vector_store.identifier == vs_id
    await vector_io_adapter.shutdown()
 async def test_kvstore_none_raises_runtime_error(vector_io_adapter):
    """
    Test that accessing vector stores with uninitialized kvstore raises RuntimeError.
    Verifies proper RuntimeError is raised instead of assertions when kvstore is None.
    """
    await vector_io_adapter.initialize()
    vector_store_id = f"kvstore_none_test_{np.random.randint(1e6)}"
    vector_store = VectorStore(
        identifier=vector_store_id,
        provider_id="test_provider",
        embedding_model="test_model",
        embedding_dimension=128,
    )
    await vector_io_adapter.register_vector_store(vector_store)
    vector_io_adapter.cache.clear()
    vector_io_adapter.kvstore = None
    with pytest.raises(RuntimeError, match="KVStore not initialized"):
        await vector_io_adapter._get_and_cache_vector_store_index(vector_store_id)
 async def test_register_and_unregister_vector_store(vector_io_adapter):
    unique_id = f"foo_db_{np.random.randint(1e6)}"
    dummy = VectorStore(