diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 96de04ec9..ce47f8ebb 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -3240,6 +3240,59 @@
                 }
             }
         },
+        "/v1/openai/v1/vector_stores/{vector_store_id}/files": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A VectorStoreFileObject representing the attached file.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/VectorStoreFileObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "VectorIO"
+                ],
+                "description": "Attach a file to a vector store.",
+                "parameters": [
+                    {
+                        "name": "vector_store_id",
+                        "in": "path",
+                        "description": "The ID of the vector store to attach the file to.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiAttachFileToVectorStoreRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/openai/v1/completions": {
             "post": {
                 "responses": {
@@ -7047,6 +7100,9 @@
                     {
                         "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
                     },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
+                    },
                     {
                         "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
                     },
@@ -7193,12 +7249,41 @@
                         "const": "file_search",
                         "default": "file_search"
                     },
-                    "vector_store_id": {
+                    "vector_store_ids": {
                         "type": "array",
                         "items": {
                             "type": "string"
                         }
                     },
+                    "filters": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "max_num_results": {
+                        "type": "integer",
+                        "default": 10
+                    },
                     "ranking_options": {
                         "type": "object",
                         "properties": {
@@ -7217,7 +7302,7 @@
                 "additionalProperties": false,
                 "required": [
                     "type",
-                    "vector_store_id"
+                    "vector_store_ids"
                 ],
                 "title": "OpenAIResponseInputToolFileSearch"
             },
@@ -7484,6 +7569,64 @@
                 ],
                 "title": "OpenAIResponseOutputMessageContentOutputText"
             },
+            "OpenAIResponseOutputMessageFileSearchToolCall": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "queries": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "file_search_call",
+                        "default": "file_search_call"
+                    },
+                    "results": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "queries",
+                    "status",
+                    "type"
+                ],
+                "title": "OpenAIResponseOutputMessageFileSearchToolCall"
+            },
             "OpenAIResponseOutputMessageFunctionToolCall": {
                 "type": "object",
                 "properties": {
@@ -7760,6 +7903,9 @@
                     {
                         "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
                     },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
+                    },
                     {
                         "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
                     },
@@ -7775,6 +7921,7 @@
                     "mapping": {
                         "message": "#/components/schemas/OpenAIResponseMessage",
                         "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
+                        "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
                         "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
                         "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
                         "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
@@ -11766,6 +11913,232 @@
                 ],
                 "title": "LogEventRequest"
             },
+            "VectorStoreChunkingStrategy": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto"
+                    },
+                    {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto",
+                        "static": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                    }
+                }
+            },
+            "VectorStoreChunkingStrategyAuto": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "auto",
+                        "default": "auto"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "VectorStoreChunkingStrategyAuto"
+            },
+            "VectorStoreChunkingStrategyStatic": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "static",
+                        "default": "static"
+                    },
+                    "static": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "static"
+                ],
+                "title": "VectorStoreChunkingStrategyStatic"
+            },
+            "VectorStoreChunkingStrategyStaticConfig": {
+                "type": "object",
+                "properties": {
+                    "chunk_overlap_tokens": {
+                        "type": "integer",
+                        "default": 400
+                    },
+                    "max_chunk_size_tokens": {
+                        "type": "integer",
+                        "default": 800
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "chunk_overlap_tokens",
+                    "max_chunk_size_tokens"
+                ],
+                "title": "VectorStoreChunkingStrategyStaticConfig"
+            },
+            "OpenaiAttachFileToVectorStoreRequest": {
+                "type": "object",
+                "properties": {
+                    "file_id": {
+                        "type": "string",
+                        "description": "The ID of the file to attach to the vector store."
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The key-value attributes stored with the file, which can be used for filtering."
+                    },
+                    "chunking_strategy": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
+                        "description": "The chunking strategy to use for the file."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "file_id"
+                ],
+                "title": "OpenaiAttachFileToVectorStoreRequest"
+            },
+            "VectorStoreFileLastError": {
+                "type": "object",
+                "properties": {
+                    "code": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "server_error"
+                            },
+                            {
+                                "type": "string",
+                                "const": "rate_limit_exceeded"
+                            }
+                        ]
+                    },
+                    "message": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "code",
+                    "message"
+                ],
+                "title": "VectorStoreFileLastError"
+            },
+            "VectorStoreFileObject": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "default": "vector_store.file"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "chunking_strategy": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy"
+                    },
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "last_error": {
+                        "$ref": "#/components/schemas/VectorStoreFileLastError"
+                    },
+                    "status": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "completed"
+                            },
+                            {
+                                "type": "string",
+                                "const": "in_progress"
+                            },
+                            {
+                                "type": "string",
+                                "const": "cancelled"
+                            },
+                            {
+                                "type": "string",
+                                "const": "failed"
+                            }
+                        ]
+                    },
+                    "usage_bytes": {
+                        "type": "integer",
+                        "default": 0
+                    },
+                    "vector_store_id": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "object",
+                    "attributes",
+                    "chunking_strategy",
+                    "created_at",
+                    "status",
+                    "usage_bytes",
+                    "vector_store_id"
+                ],
+                "title": "VectorStoreFileObject",
+                "description": "OpenAI Vector Store File object."
+            },
             "OpenAIJSONSchema": {
                 "type": "object",
                 "properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index b2fe870be..07a176b32 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2263,6 +2263,43 @@ paths:
             schema:
               $ref: '#/components/schemas/LogEventRequest'
         required: true
+  /v1/openai/v1/vector_stores/{vector_store_id}/files:
+    post:
+      responses:
+        '200':
+          description: >-
+            A VectorStoreFileObject representing the attached file.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/VectorStoreFileObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - VectorIO
+      description: Attach a file to a vector store.
+      parameters:
+        - name: vector_store_id
+          in: path
+          description: >-
+            The ID of the vector store to attach the file to.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
+        required: true
   /v1/openai/v1/completions:
     post:
       responses:
@@ -5021,6 +5058,7 @@ components:
     OpenAIResponseInput:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
         - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
         - $ref: '#/components/schemas/OpenAIResponseMessage'
@@ -5115,10 +5153,23 @@ components:
           type: string
           const: file_search
           default: file_search
-        vector_store_id:
+        vector_store_ids:
           type: array
           items:
             type: string
+        filters:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        max_num_results:
+          type: integer
+          default: 10
         ranking_options:
           type: object
           properties:
@@ -5132,7 +5183,7 @@ components:
       additionalProperties: false
       required:
         - type
-        - vector_store_id
+        - vector_store_ids
       title: OpenAIResponseInputToolFileSearch
     OpenAIResponseInputToolFunction:
       type: object
@@ -5294,6 +5345,41 @@ components:
         - type
       title: >-
         OpenAIResponseOutputMessageContentOutputText
+    "OpenAIResponseOutputMessageFileSearchToolCall":
+      type: object
+      properties:
+        id:
+          type: string
+        queries:
+          type: array
+          items:
+            type: string
+        status:
+          type: string
+        type:
+          type: string
+          const: file_search_call
+          default: file_search_call
+        results:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+      additionalProperties: false
+      required:
+        - id
+        - queries
+        - status
+        - type
+      title: >-
+        OpenAIResponseOutputMessageFileSearchToolCall
     "OpenAIResponseOutputMessageFunctionToolCall":
       type: object
       properties:
@@ -5491,6 +5577,7 @@ components:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseMessage'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
@@ -5499,6 +5586,7 @@ components:
         mapping:
           message: '#/components/schemas/OpenAIResponseMessage'
           web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+          file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
@@ -8251,6 +8339,148 @@ components:
         - event
         - ttl_seconds
       title: LogEventRequest
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+    OpenaiAttachFileToVectorStoreRequest:
+      type: object
+      properties:
+        file_id:
+          type: string
+          description: >-
+            The ID of the file to attach to the vector store.
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The key-value attributes stored with the file, which can be used for filtering.
+        chunking_strategy:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+          description: >-
+            The chunking strategy to use for the file.
+      additionalProperties: false
+      required:
+        - file_id
+      title: OpenaiAttachFileToVectorStoreRequest
+    VectorStoreFileLastError:
+      type: object
+      properties:
+        code:
+          oneOf:
+            - type: string
+              const: server_error
+            - type: string
+              const: rate_limit_exceeded
+        message:
+          type: string
+      additionalProperties: false
+      required:
+        - code
+        - message
+      title: VectorStoreFileLastError
+    VectorStoreFileObject:
+      type: object
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+          default: vector_store.file
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        chunking_strategy:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+        created_at:
+          type: integer
+        last_error:
+          $ref: '#/components/schemas/VectorStoreFileLastError'
+        status:
+          oneOf:
+            - type: string
+              const: completed
+            - type: string
+              const: in_progress
+            - type: string
+              const: cancelled
+            - type: string
+              const: failed
+        usage_bytes:
+          type: integer
+          default: 0
+        vector_store_id:
+          type: string
+      additionalProperties: false
+      required:
+        - id
+        - object
+        - attributes
+        - chunking_strategy
+        - created_at
+        - status
+        - usage_bytes
+        - vector_store_id
+      title: VectorStoreFileObject
+      description: OpenAI Vector Store File object.
     OpenAIJSONSchema:
       type: object
       properties:
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index 4d148feda..e09c79359 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -18,6 +18,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
 | agents | `inline::meta-reference` |
 | datasetio | `remote::huggingface`, `inline::localfs` |
 | eval | `inline::meta-reference` |
+| files | `inline::localfs` |
 | inference | `remote::ollama` |
 | post_training | `inline::huggingface` |
 | safety | `inline::llama-guard` |
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 35b3d5ace..2e1cb257a 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -81,6 +81,15 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
     type: Literal["web_search_call"] = "web_search_call"
 
 
+@json_schema_type
+class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
+    id: str
+    queries: list[str]
+    status: str
+    type: Literal["file_search_call"] = "file_search_call"
+    results: list[dict[str, Any]] | None = None
+
+
 @json_schema_type
 class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
     call_id: str
@@ -119,6 +128,7 @@ class OpenAIResponseOutputMessageMCPListTools(BaseModel):
 OpenAIResponseOutput = Annotated[
     OpenAIResponseMessage
     | OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFileSearchToolCall
     | OpenAIResponseOutputMessageFunctionToolCall
     | OpenAIResponseOutputMessageMCPCall
     | OpenAIResponseOutputMessageMCPListTools,
@@ -362,6 +372,7 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
 OpenAIResponseInput = Annotated[
     # Responses API allows output messages to be passed in as input
     OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFileSearchToolCall
     | OpenAIResponseOutputMessageFunctionToolCall
     | OpenAIResponseInputFunctionToolCallOutput
     |
@@ -397,9 +408,10 @@ class FileSearchRankingOptions(BaseModel):
 @json_schema_type
 class OpenAIResponseInputToolFileSearch(BaseModel):
     type: Literal["file_search"] = "file_search"
-    vector_store_id: list[str]
+    vector_store_ids: list[str]
+    filters: dict[str, Any] | None = None
+    max_num_results: int | None = Field(default=10, ge=1, le=50)
     ranking_options: FileSearchRankingOptions | None = None
-    # TODO: add filters
 
 
 class ApprovalFilter(BaseModel):
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 1c8ae4dab..77d4cfc5a 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -8,7 +8,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Literal, Protocol, runtime_checkable
+from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
@@ -16,6 +16,7 @@ from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack.strong_typing.schema import register_schema
 
 
 class Chunk(BaseModel):
@@ -133,6 +134,50 @@ class VectorStoreDeleteResponse(BaseModel):
     deleted: bool = True
 
 
+@json_schema_type
+class VectorStoreChunkingStrategyAuto(BaseModel):
+    type: Literal["auto"] = "auto"
+
+
+@json_schema_type
+class VectorStoreChunkingStrategyStaticConfig(BaseModel):
+    chunk_overlap_tokens: int = 400
+    max_chunk_size_tokens: int = Field(800, ge=100, le=4096)
+
+
+@json_schema_type
+class VectorStoreChunkingStrategyStatic(BaseModel):
+    type: Literal["static"] = "static"
+    static: VectorStoreChunkingStrategyStaticConfig
+
+
+VectorStoreChunkingStrategy = Annotated[
+    VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic, Field(discriminator="type")
+]
+register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")
+
+
+@json_schema_type
+class VectorStoreFileLastError(BaseModel):
+    code: Literal["server_error"] | Literal["rate_limit_exceeded"]
+    message: str
+
+
+@json_schema_type
+class VectorStoreFileObject(BaseModel):
+    """OpenAI Vector Store File object."""
+
+    id: str
+    object: str = "vector_store.file"
+    attributes: dict[str, Any] = Field(default_factory=dict)
+    chunking_strategy: VectorStoreChunkingStrategy
+    created_at: int
+    last_error: VectorStoreFileLastError | None = None
+    status: Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
+    usage_bytes: int = 0
+    vector_store_id: str
+
+
 class VectorDBStore(Protocol):
     def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
 
@@ -290,3 +335,21 @@ class VectorIO(Protocol):
         :returns: A VectorStoreSearchResponse containing the search results.
         """
         ...
+
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        """Attach a file to a vector store.
+
+        :param vector_store_id: The ID of the vector store to attach the file to.
+        :param file_id: The ID of the file to attach to the vector store.
+        :param attributes: The key-value attributes stored with the file, which can be used for filtering.
+        :param chunking_strategy: The chunking strategy to use for the file.
+        :returns: A VectorStoreFileObject representing the attached file.
+        """
+        ...
diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py
index 3d65aef24..8eb56b7ca 100644
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@@ -19,6 +19,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import RoutingTable
 
@@ -254,3 +255,20 @@ class VectorIORouter(VectorIO):
             ranking_options=ranking_options,
             rewrite_query=rewrite_query,
         )
+
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
+        # Route based on vector store ID
+        provider = self.routing_table.get_provider_impl(vector_store_id)
+        return await provider.openai_attach_file_to_vector_store(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+            attributes=attributes,
+            chunking_strategy=chunking_strategy,
+        )
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 0ff6dc2c5..33fcbfa5d 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -24,6 +24,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
+    OpenAIResponseInputToolFileSearch,
     OpenAIResponseInputToolMCP,
     OpenAIResponseMessage,
     OpenAIResponseObject,
@@ -34,6 +35,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutput,
     OpenAIResponseOutputMessageContent,
     OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
     OpenAIResponseOutputMessageFunctionToolCall,
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall,
@@ -62,7 +64,7 @@ from llama_stack.apis.inference.inference import (
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
 )
-from llama_stack.apis.tools.tools import ToolGroups, ToolRuntime
+from llama_stack.apis.tools import RAGQueryConfig, ToolGroups, ToolRuntime
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
 from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
@@ -198,7 +200,8 @@ class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
 class ChatCompletionContext(BaseModel):
     model: str
     messages: list[OpenAIMessageParam]
-    tools: list[ChatCompletionToolParam] | None = None
+    response_tools: list[OpenAIResponseInputTool] | None = None
+    chat_tools: list[ChatCompletionToolParam] | None = None
     mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP]
     temperature: float | None
     response_format: OpenAIResponseFormatParam
@@ -388,7 +391,8 @@ class OpenAIResponsesImpl:
         ctx = ChatCompletionContext(
             model=model,
             messages=messages,
-            tools=chat_tools,
+            response_tools=tools,
+            chat_tools=chat_tools,
             mcp_tool_to_server=mcp_tool_to_server,
             temperature=temperature,
             response_format=response_format,
@@ -417,7 +421,7 @@ class OpenAIResponsesImpl:
             completion_result = await self.inference_api.openai_chat_completion(
                 model=ctx.model,
                 messages=messages,
-                tools=ctx.tools,
+                tools=ctx.chat_tools,
                 stream=True,
                 temperature=ctx.temperature,
                 response_format=ctx.response_format,
@@ -606,6 +610,12 @@ class OpenAIResponsesImpl:
                 if not tool:
                     raise ValueError(f"Tool {tool_name} not found")
                 chat_tools.append(make_openai_tool(tool_name, tool))
+            elif input_tool.type == "file_search":
+                tool_name = "knowledge_search"
+                tool = await self.tool_groups_api.get_tool(tool_name)
+                if not tool:
+                    raise ValueError(f"Tool {tool_name} not found")
+                chat_tools.append(make_openai_tool(tool_name, tool))
             elif input_tool.type == "mcp":
                 always_allowed = None
                 never_allowed = None
@@ -667,6 +677,7 @@ class OpenAIResponsesImpl:
 
         tool_call_id = tool_call.id
         function = tool_call.function
+        tool_kwargs = json.loads(function.arguments) if function.arguments else {}
 
         if not function or not tool_call_id or not function.name:
             return None, None
@@ -680,12 +691,26 @@ class OpenAIResponsesImpl:
                     endpoint=mcp_tool.server_url,
                     headers=mcp_tool.headers or {},
                     tool_name=function.name,
-                    kwargs=json.loads(function.arguments) if function.arguments else {},
+                    kwargs=tool_kwargs,
                 )
             else:
+                if function.name == "knowledge_search":
+                    response_file_search_tool = next(
+                        t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)
+                    )
+                    if response_file_search_tool:
+                        if response_file_search_tool.filters:
+                            logger.warning("Filters are not yet supported for file_search tool")
+                        if response_file_search_tool.ranking_options:
+                            logger.warning("Ranking options are not yet supported for file_search tool")
+                        tool_kwargs["vector_db_ids"] = response_file_search_tool.vector_store_ids
+                        tool_kwargs["query_config"] = RAGQueryConfig(
+                            mode="vector",
+                            max_chunks=response_file_search_tool.max_num_results,
+                        )
                 result = await self.tool_runtime_api.invoke_tool(
                     tool_name=function.name,
-                    kwargs=json.loads(function.arguments) if function.arguments else {},
+                    kwargs=tool_kwargs,
                 )
         except Exception as e:
             error_exc = e
@@ -713,6 +738,27 @@ class OpenAIResponsesImpl:
                 )
                 if error_exc or (result.error_code and result.error_code > 0) or result.error_message:
                     message.status = "failed"
+            elif function.name == "knowledge_search":
+                message = OpenAIResponseOutputMessageFileSearchToolCall(
+                    id=tool_call_id,
+                    queries=[tool_kwargs.get("query", "")],
+                    status="completed",
+                )
+                if "document_ids" in result.metadata:
+                    message.results = []
+                    for i, doc_id in enumerate(result.metadata["document_ids"]):
+                        text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
+                        score = result.metadata["scores"][i] if "scores" in result.metadata else None
+                        message.results.append(
+                            {
+                                "file_id": doc_id,
+                                "filename": doc_id,
+                                "text": text,
+                                "score": score,
+                            }
+                        )
+                if error_exc or (result.error_code and result.error_code > 0) or result.error_message:
+                    message.status = "failed"
             else:
                 raise ValueError(f"Unknown tool {function.name} called")
 
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 4776d47d0..e15d067a7 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -170,6 +170,8 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
             content=picked,
             metadata={
                 "document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]],
+                "chunks": [c.content for c in chunks[: len(picked)]],
+                "scores": scores[: len(picked)],
             },
         )
 
diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py
index 68a1dee66..dd1c59b7b 100644
--- a/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -16,6 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
 
     assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
 
-    impl = FaissVectorIOAdapter(config, deps[Api.inference])
+    impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 5e9155011..afb911726 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -15,6 +15,7 @@ import faiss
 import numpy as np
 from numpy.typing import NDArray
 
+from llama_stack.apis.files import Files
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.inference.inference import Inference
 from llama_stack.apis.vector_dbs import VectorDB
@@ -132,9 +133,10 @@ class FaissIndex(EmbeddingIndex):
 
 
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
-    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference) -> None:
+    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
         self.config = config
         self.inference_api = inference_api
+        self.files_api = files_api
         self.cache: dict[str, VectorDBWithIndex] = {}
         self.kvstore: KVStore | None = None
         self.openai_vector_stores: dict[str, dict[str, Any]] = {}
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index 6db176eda..e5200a755 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -15,6 +15,6 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
     from .sqlite_vec import SQLiteVecVectorIOAdapter
 
     assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference])
+    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 02f04e766..f69cf8a32 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -17,6 +17,7 @@ import numpy as np
 import sqlite_vec
 from numpy.typing import NDArray
 
+from llama_stack.apis.files.files import Files
 from llama_stack.apis.inference.inference import Inference
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
@@ -301,9 +302,10 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
     and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
     """
 
-    def __init__(self, config, inference_api: Inference) -> None:
+    def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
         self.config = config
         self.inference_api = inference_api
+        self.files_api = files_api
         self.cache: dict[str, VectorDBWithIndex] = {}
         self.openai_vector_stores: dict[str, dict[str, Any]] = {}
 
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index d888c8420..55c1b5617 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -24,6 +24,7 @@ def available_providers() -> list[ProviderSpec]:
             config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
             deprecation_warning="Please use the `inline::faiss` provider instead.",
             api_dependencies=[Api.inference],
+            optional_api_dependencies=[Api.files],
         ),
         InlineProviderSpec(
             api=Api.vector_io,
@@ -32,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
             module="llama_stack.providers.inline.vector_io.faiss",
             config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
             api_dependencies=[Api.inference],
+            optional_api_dependencies=[Api.files],
         ),
         # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
         # source distribution and the wheels are not available for all platforms.
@@ -42,6 +44,7 @@ def available_providers() -> list[ProviderSpec]:
             module="llama_stack.providers.inline.vector_io.sqlite_vec",
             config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
             api_dependencies=[Api.inference],
+            optional_api_dependencies=[Api.files],
         ),
         InlineProviderSpec(
             api=Api.vector_io,
@@ -51,6 +54,7 @@ def available_providers() -> list[ProviderSpec]:
             config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
             deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
             api_dependencies=[Api.inference],
+            optional_api_dependencies=[Api.files],
         ),
         remote_provider_spec(
             Api.vector_io,
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 0d8451eb2..fee29cfd9 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
@@ -241,3 +242,12 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         rewrite_query: bool | None = False,
     ) -> VectorStoreSearchResponsePage:
         raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
+
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 8ae74aedc..51c541c02 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -25,6 +25,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
@@ -240,6 +241,15 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
     ) -> VectorStoreSearchResponsePage:
         raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
 
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
+
 
 def generate_chunk_id(document_id: str, chunk_text: str) -> str:
     """Generate a unique chunk ID using a hash of document ID and chunk text."""
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 10f3b5b0d..1631a7a2a 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
@@ -241,3 +242,12 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         rewrite_query: bool | None = False,
     ) -> VectorStoreSearchResponsePage:
         raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
+
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 7d8163ed2..f9701897a 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -5,11 +5,13 @@
 # the root directory of this source tree.
 
 import logging
+import mimetypes
 import time
 import uuid
 from abc import ABC, abstractmethod
 from typing import Any
 
+from llama_stack.apis.files import Files
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     QueryChunksResponse,
@@ -20,6 +22,15 @@ from llama_stack.apis.vector_io import (
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
+from llama_stack.apis.vector_io.vector_io import (
+    Chunk,
+    VectorStoreChunkingStrategy,
+    VectorStoreChunkingStrategyAuto,
+    VectorStoreChunkingStrategyStatic,
+    VectorStoreFileLastError,
+    VectorStoreFileObject,
+)
+from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks
 
 logger = logging.getLogger(__name__)
 
@@ -36,6 +47,7 @@ class OpenAIVectorStoreMixin(ABC):
 
     # These should be provided by the implementing class
     openai_vector_stores: dict[str, dict[str, Any]]
+    files_api: Files | None
 
     @abstractmethod
     async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
@@ -67,6 +79,16 @@ class OpenAIVectorStoreMixin(ABC):
         """Unregister a vector database (provider-specific implementation)."""
         pass
 
+    @abstractmethod
+    async def insert_chunks(
+        self,
+        vector_db_id: str,
+        chunks: list[Chunk],
+        ttl_seconds: int | None = None,
+    ) -> None:
+        """Insert chunks into a vector database (provider-specific implementation)."""
+        pass
+
     @abstractmethod
     async def query_chunks(
         self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
@@ -383,3 +405,78 @@ class OpenAIVectorStoreMixin(ABC):
             if metadata[key] != value:
                 return False
         return True
+
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        attributes = attributes or {}
+        chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
+
+        vector_store_file_object = VectorStoreFileObject(
+            id=file_id,
+            attributes=attributes,
+            chunking_strategy=chunking_strategy,
+            created_at=int(time.time()),
+            status="in_progress",
+            vector_store_id=vector_store_id,
+        )
+
+        if not hasattr(self, "files_api") or not self.files_api:
+            vector_store_file_object.status = "failed"
+            vector_store_file_object.last_error = VectorStoreFileLastError(
+                code="server_error",
+                message="Files API is not available",
+            )
+            return vector_store_file_object
+
+        if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic):
+            max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens
+            chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens
+        else:
+            # Default values from OpenAI API spec
+            max_chunk_size_tokens = 800
+            chunk_overlap_tokens = 400
+
+        try:
+            file_response = await self.files_api.openai_retrieve_file(file_id)
+            mime_type, _ = mimetypes.guess_type(file_response.filename)
+            content_response = await self.files_api.openai_retrieve_file_content(file_id)
+
+            content = content_from_data_and_mime_type(content_response.body, mime_type)
+
+            chunks = make_overlapped_chunks(
+                file_id,
+                content,
+                max_chunk_size_tokens,
+                chunk_overlap_tokens,
+                attributes,
+            )
+
+            if not chunks:
+                vector_store_file_object.status = "failed"
+                vector_store_file_object.last_error = VectorStoreFileLastError(
+                    code="server_error",
+                    message="No chunks were generated from the file",
+                )
+                return vector_store_file_object
+
+            await self.insert_chunks(
+                vector_db_id=vector_store_id,
+                chunks=chunks,
+            )
+        except Exception as e:
+            logger.error(f"Error attaching file to vector store: {e}")
+            vector_store_file_object.status = "failed"
+            vector_store_file_object.last_error = VectorStoreFileLastError(
+                code="server_error",
+                message=str(e),
+            )
+            return vector_store_file_object
+
+        vector_store_file_object.status = "completed"
+
+        return vector_store_file_object
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index 4cd15860b..2c0c7c8e9 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -72,16 +72,18 @@ def content_from_data(data_url: str) -> str:
         data = unquote(data)
         encoding = parts["encoding"] or "utf-8"
         data = data.encode(encoding)
+    return content_from_data_and_mime_type(data, parts["mimetype"], parts.get("encoding", None))
 
-    encoding = parts["encoding"]
-    if not encoding:
-        import chardet
 
-        detected = chardet.detect(data)
-        encoding = detected["encoding"]
+def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, encoding: str | None = None) -> str:
+    if isinstance(data, bytes):
+        if not encoding:
+            import chardet
 
-    mime_type = parts["mimetype"]
-    mime_category = mime_type.split("/")[0]
+            detected = chardet.detect(data)
+            encoding = detected["encoding"]
+
+    mime_category = mime_type.split("/")[0] if mime_type else None
     if mime_category == "text":
         # For text-based files (including CSV, MD)
         return data.decode(encoding)
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index 36a120897..ebe0849f3 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -23,6 +23,8 @@ distribution_spec:
     - inline::basic
     - inline::llm-as-judge
     - inline::braintrust
+    files:
+    - inline::localfs
     post_training:
     - inline::huggingface
     tool_runtime:
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 0b4f05128..46c4852a4 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
@@ -29,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate:
         "eval": ["inline::meta-reference"],
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "files": ["inline::localfs"],
         "post_training": ["inline::huggingface"],
         "tool_runtime": [
             "remote::brave-search",
@@ -49,6 +51,11 @@ def get_distribution_template() -> DistributionTemplate:
         provider_type="inline::faiss",
         config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
     )
+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
     posttraining_provider = Provider(
         provider_id="huggingface",
         provider_type="inline::huggingface",
@@ -98,6 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_faiss],
+                    "files": [files_provider],
                     "post_training": [posttraining_provider],
                 },
                 default_models=[inference_model, embedding_model],
@@ -107,6 +115,7 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_faiss],
+                    "files": [files_provider],
                     "post_training": [posttraining_provider],
                     "safety": [
                         Provider(
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 7bf9fc3bd..85d5c813b 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -4,6 +4,7 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - post_training
 - safety
@@ -84,6 +85,14 @@ providers:
     provider_type: inline::braintrust
     config:
       openai_api_key: ${env.OPENAI_API_KEY:}
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 0030bcd60..2d10a99a4 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -4,6 +4,7 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - post_training
 - safety
@@ -82,6 +83,14 @@ providers:
     provider_type: inline::braintrust
     config:
       openai_api_key: ${env.OPENAI_API_KEY:}
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml
index 5fd3cc3f5..9bf4913a7 100644
--- a/llama_stack/templates/starter/build.yaml
+++ b/llama_stack/templates/starter/build.yaml
@@ -17,6 +17,8 @@ distribution_spec:
     - inline::sqlite-vec
     - remote::chromadb
     - remote::pgvector
+    files:
+    - inline::localfs
     safety:
     - inline::llama-guard
     agents:
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index 4732afa77..319ababe5 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -4,6 +4,7 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - safety
 - scoring
@@ -75,6 +76,14 @@ providers:
       db: ${env.PGVECTOR_DB:}
       user: ${env.PGVECTOR_USER:}
       password: ${env.PGVECTOR_PASSWORD:}
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/starter/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/files_metadata.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index 650ecc87f..2a44a0a37 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -12,6 +12,7 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
@@ -134,6 +135,7 @@ def get_distribution_template() -> DistributionTemplate:
     providers = {
         "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
         "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
+        "files": ["inline::localfs"],
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
@@ -170,6 +172,11 @@ def get_distribution_template() -> DistributionTemplate:
             ),
         ),
     ]
+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
     embedding_provider = Provider(
         provider_id="sentence-transformers",
         provider_type="inline::sentence-transformers",
@@ -212,6 +219,7 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": inference_providers + [embedding_provider],
                     "vector_io": vector_io_providers,
+                    "files": [files_provider],
                 },
                 default_models=default_models + [embedding_model],
                 default_tool_groups=default_tool_groups,
diff --git a/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf b/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf
new file mode 100644
index 000000000..25579f425
Binary files /dev/null and b/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf differ
diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
index 4d6c19b59..1acf06388 100644
--- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@@ -31,6 +31,25 @@ test_response_web_search:
         search_context_size: "low"
       output: "128"
 
+test_response_file_search:
+  test_name: test_response_file_search
+  test_params:
+    case:
+    - case_id: "llama_experts"
+      input: "How many experts does the Llama 4 Maverick model have?"
+      tools:
+      - type: file_search
+        # vector_store_ids param for file_search tool gets added by the test runner
+      file_content: "Llama 4 Maverick has 128 experts"
+      output: "128"
+    - case_id: "llama_experts_pdf"
+      input: "How many experts does the Llama 4 Maverick model have?"
+      tools:
+      - type: file_search
+        # vector_store_ids param for file_search toolgets added by the test runner
+      file_path: "pdfs/llama_stack_and_models.pdf"
+      output: "128"
+
 test_response_mcp_tool:
   test_name: test_response_mcp_tool
   test_params:
diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py
index 28020d3b1..1c9cdaa3a 100644
--- a/tests/verifications/openai_api/test_responses.py
+++ b/tests/verifications/openai_api/test_responses.py
@@ -5,6 +5,8 @@
 # the root directory of this source tree.
 
 import json
+import os
+import time
 
 import httpx
 import openai
@@ -23,6 +25,31 @@ from tests.verifications.openai_api.fixtures.load import load_test_cases
 responses_test_cases = load_test_cases("responses")
 
 
+def _new_vector_store(openai_client, name):
+    # Ensure we don't reuse an existing vector store
+    vector_stores = openai_client.vector_stores.list()
+    for vector_store in vector_stores:
+        if vector_store.name == name:
+            openai_client.vector_stores.delete(vector_store_id=vector_store.id)
+
+    # Create a new vector store
+    vector_store = openai_client.vector_stores.create(
+        name=name,
+    )
+    return vector_store
+
+
+def _upload_file(openai_client, name, file_path):
+    # Ensure we don't reuse an existing file
+    files = openai_client.files.list()
+    for file in files:
+        if file.filename == name:
+            openai_client.files.delete(file_id=file.id)
+
+    # Upload a text file with our document content
+    return openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
+
+
 @pytest.mark.parametrize(
     "case",
     responses_test_cases["test_response_basic"]["test_params"]["case"],
@@ -258,6 +285,111 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
     assert case["output"].lower() in response.output_text.lower().strip()
 
 
+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_file_search"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_file_search(
+    request, openai_client, model, provider, verification_config, tmp_path, case
+):
+    if isinstance(openai_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API file search is not yet supported in library client.")
+
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    vector_store = _new_vector_store(openai_client, "test_vector_store")
+
+    if "file_content" in case:
+        file_name = "test_response_non_streaming_file_search.txt"
+        file_path = tmp_path / file_name
+        file_path.write_text(case["file_content"])
+    elif "file_path" in case:
+        file_path = os.path.join(os.path.dirname(__file__), "fixtures", case["file_path"])
+        file_name = os.path.basename(file_path)
+    else:
+        raise ValueError(f"No file content or path provided for case {case['case_id']}")
+
+    file_response = _upload_file(openai_client, file_name, file_path)
+
+    # Attach our file to the vector store
+    file_attach_response = openai_client.vector_stores.files.create(
+        vector_store_id=vector_store.id,
+        file_id=file_response.id,
+    )
+
+    # Wait for the file to be attached
+    while file_attach_response.status == "in_progress":
+        time.sleep(0.1)
+        file_attach_response = openai_client.vector_stores.files.retrieve(
+            vector_store_id=vector_store.id,
+            file_id=file_response.id,
+        )
+    assert file_attach_response.status == "completed", f"Expected file to be attached, got {file_attach_response}"
+    assert not file_attach_response.last_error
+
+    # Update our tools with the right vector store id
+    tools = case["tools"]
+    for tool in tools:
+        if tool["type"] == "file_search":
+            tool["vector_store_ids"] = [vector_store.id]
+
+    # Create the response request, which should query our vector store
+    response = openai_client.responses.create(
+        model=model,
+        input=case["input"],
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    # Verify the file_search_tool was called
+    assert len(response.output) > 1
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].queries  # ensure it's some non-empty list
+    assert response.output[0].results
+    assert case["output"].lower() in response.output[0].results[0].text.lower()
+    assert response.output[0].results[0].score > 0
+
+    # Verify the output_text generated by the response
+    assert case["output"].lower() in response.output_text.lower().strip()
+
+
+def test_response_non_streaming_file_search_empty_vector_store(
+    request, openai_client, model, provider, verification_config
+):
+    if isinstance(openai_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API file search is not yet supported in library client.")
+
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    vector_store = _new_vector_store(openai_client, "test_vector_store")
+
+    # Create the response request, which should query our vector store
+    response = openai_client.responses.create(
+        model=model,
+        input="How many experts does the Llama 4 Maverick model have?",
+        tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    # Verify the file_search_tool was called
+    assert len(response.output) > 1
+    assert response.output[0].type == "file_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[0].queries  # ensure it's some non-empty list
+    assert not response.output[0].results  # ensure we don't get any results
+
+    # Verify some output_text was generated by the response
+    assert response.output_text
+
+
 @pytest.mark.parametrize(
     "case",
     responses_test_cases["test_response_mcp_tool"]["test_params"]["case"],