From 726bdc414d95c1c3d4bccadc5f5f8a2a6e7ac042 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Fri, 7 Nov 2025 15:09:11 -0500 Subject: [PATCH] use FastAPI Query class instead of custom middlware Signed-off-by: Francisco Javier Arceo --- client-sdks/stainless/openapi.yml | 29 ++++--- docs/static/llama-stack-spec.yaml | 29 ++++--- docs/static/stainless-llama-stack-spec.yaml | 29 ++++--- .../core/server/query_params_middleware.py | 49 ----------- src/llama_stack/core/server/server.py | 8 -- .../utils/memory/openai_vector_store_mixin.py | 5 +- .../server/test_query_params_middleware.py | 86 ------------------- 7 files changed, 52 insertions(+), 183 deletions(-) delete mode 100644 src/llama_stack/core/server/query_params_middleware.py delete mode 100644 tests/unit/server/test_query_params_middleware.py diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 020f3840e..e9dae4bd0 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -2691,10 +2691,14 @@ paths: responses: '200': description: >- +<<<<<<< HEAD <<<<<<< HEAD A VectorStoreFileContentResponse representing the file contents. ======= File contents, optionally with embeddings and metadata based on extra_query +======= + File contents, optionally with embeddings and metadata based on query +>>>>>>> c192529c (use FastAPI Query class instead of custom middlware) parameters. >>>>>>> 639f0daa (feat: Adding optional embeddings to content) content: @@ -2731,23 +2735,20 @@ paths: required: true schema: type: string - - name: extra_query + - name: include_embeddings in: query description: >- - Optional extra parameters to control response format. Set include_embeddings=true - to include embedding vectors. Set include_metadata=true to include chunk - metadata. + Whether to include embedding vectors in the response. required: false schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -10113,6 +10114,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 0888dd586..8cf1d1c89 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2688,10 +2688,14 @@ paths: responses: '200': description: >- +<<<<<<< HEAD <<<<<<< HEAD A VectorStoreFileContentResponse representing the file contents. ======= File contents, optionally with embeddings and metadata based on extra_query +======= + File contents, optionally with embeddings and metadata based on query +>>>>>>> c192529c (use FastAPI Query class instead of custom middlware) parameters. >>>>>>> 639f0daa (feat: Adding optional embeddings to content) content: @@ -2728,23 +2732,20 @@ paths: required: true schema: type: string - - name: extra_query + - name: include_embeddings in: query description: >- - Optional extra parameters to control response format. Set include_embeddings=true - to include embedding vectors. Set include_metadata=true to include chunk - metadata. + Whether to include embedding vectors in the response. required: false schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -9397,6 +9398,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 020f3840e..e9dae4bd0 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2691,10 +2691,14 @@ paths: responses: '200': description: >- +<<<<<<< HEAD <<<<<<< HEAD A VectorStoreFileContentResponse representing the file contents. ======= File contents, optionally with embeddings and metadata based on extra_query +======= + File contents, optionally with embeddings and metadata based on query +>>>>>>> c192529c (use FastAPI Query class instead of custom middlware) parameters. >>>>>>> 639f0daa (feat: Adding optional embeddings to content) content: @@ -2731,23 +2735,20 @@ paths: required: true schema: type: string - - name: extra_query + - name: include_embeddings in: query description: >- - Optional extra parameters to control response format. Set include_embeddings=true - to include embedding vectors. Set include_metadata=true to include chunk - metadata. + Whether to include embedding vectors in the response. required: false schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -10113,6 +10114,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: diff --git a/src/llama_stack/core/server/query_params_middleware.py b/src/llama_stack/core/server/query_params_middleware.py deleted file mode 100644 index 828f401fc..000000000 --- a/src/llama_stack/core/server/query_params_middleware.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -import re - -from fastapi import Request -from starlette.middleware.base import BaseHTTPMiddleware - -from llama_stack.log import get_logger - -logger = get_logger(name=__name__, category="core::middleware") - -# Patterns for endpoints that need query parameter injection -QUERY_PARAM_ENDPOINTS = [ - # /vector_stores/{vector_store_id}/files/{file_id}/content - re.compile(r"/vector_stores/[^/]+/files/[^/]+/content$"), -] - - -class QueryParamsMiddleware(BaseHTTPMiddleware): - """Middleware to inject query parameters into extra_query for specific endpoints""" - - async def dispatch(self, request: Request, call_next): - # Check if this is an endpoint that needs query parameter injection - if request.method == "GET" and any(pattern.search(str(request.url.path)) for pattern in QUERY_PARAM_ENDPOINTS): - # Extract all query parameters and convert to appropriate types - extra_query = {} - query_params = dict(request.query_params) - - # Convert query parameters using JSON parsing for robust type conversion - for key, value in query_params.items(): - try: - # parse as JSON to handles booleans, numbers, strings properly - extra_query[key] = json.loads(value) - except (json.JSONDecodeError, ValueError): - # if parsing fails, keep as string - extra_query[key] = value - - if extra_query: - # Store the extra_query in request state so we can access it later - request.state.extra_query = extra_query - logger.debug(f"QueryParamsMiddleware extracted extra_query: {extra_query}") - - response = await call_next(request) - return response diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py index 7066aa3a0..80505c3f9 100644 --- a/src/llama_stack/core/server/server.py +++ b/src/llama_stack/core/server/server.py @@ -46,7 +46,6 @@ from llama_stack.core.request_headers import ( request_provider_data_context, user_from_scope, ) -from llama_stack.core.server.query_params_middleware import QueryParamsMiddleware from llama_stack.core.server.routes import get_all_api_routes from llama_stack.core.stack import ( Stack, @@ -264,10 +263,6 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable: is_streaming = is_streaming_request(func.__name__, request, **kwargs) - # Inject extra_query from middleware if available - if hasattr(request.state, "extra_query") and request.state.extra_query: - kwargs["extra_query"] = request.state.extra_query - try: if is_streaming: context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR] @@ -407,9 +402,6 @@ def create_app() -> StackApp: if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"): app.add_middleware(ClientVersionMiddleware) - # handle extra_query for specific GET requests - app.add_middleware(QueryParamsMiddleware) - impls = app.stack.impls if config.server.auth: diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index ab376be70..3777088de 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -450,7 +450,7 @@ class OpenAIVectorStoreMixin(ABC): # Now that our vector store is created, attach any files that were provided file_ids = params.file_ids or [] tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids] - await asyncio.gather(*tasks, return_exceptions=True) + await asyncio.gather(*tasks) # Get the updated store info and return it store_info = self.openai_vector_stores[vector_store_id] @@ -928,6 +928,9 @@ class OpenAIVectorStoreMixin(ABC): if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) + # Parameters are already provided directly + # include_embeddings and include_metadata are now function parameters + file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) chunks = [Chunk.model_validate(c) for c in dict_chunks] diff --git a/tests/unit/server/test_query_params_middleware.py b/tests/unit/server/test_query_params_middleware.py deleted file mode 100644 index 30993576b..000000000 --- a/tests/unit/server/test_query_params_middleware.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from unittest.mock import AsyncMock, Mock - -from fastapi import Request - -from llama_stack.core.server.query_params_middleware import QueryParamsMiddleware - - -class TestQueryParamsMiddleware: - """Test cases for the QueryParamsMiddleware.""" - - async def test_extracts_query_params_for_vector_store_content(self): - """Test that middleware extracts query params for vector store content endpoints.""" - middleware = QueryParamsMiddleware(Mock()) - request = Mock(spec=Request) - request.method = "GET" - - # Mock the URL properly - mock_url = Mock() - mock_url.path = "/v1/vector_stores/vs_123/files/file_456/content" - request.url = mock_url - - request.query_params = {"include_embeddings": "true", "include_metadata": "false"} - - # Create a fresh state object without any attributes - class MockState: - pass - - request.state = MockState() - - await middleware.dispatch(request, AsyncMock()) - - assert hasattr(request.state, "extra_query") - assert request.state.extra_query == {"include_embeddings": True, "include_metadata": False} - - async def test_ignores_non_vector_store_endpoints(self): - """Test that middleware ignores non-vector store endpoints.""" - middleware = QueryParamsMiddleware(Mock()) - request = Mock(spec=Request) - request.method = "GET" - - # Mock the URL properly - mock_url = Mock() - mock_url.path = "/v1/inference/chat_completion" - request.url = mock_url - - request.query_params = {"include_embeddings": "true"} - - # Create a fresh state object without any attributes - class MockState: - pass - - request.state = MockState() - - await middleware.dispatch(request, AsyncMock()) - - assert not hasattr(request.state, "extra_query") - - async def test_handles_json_parsing(self): - """Test that middleware correctly parses JSON values and handles invalid JSON.""" - middleware = QueryParamsMiddleware(Mock()) - request = Mock(spec=Request) - request.method = "GET" - - # Mock the URL properly - mock_url = Mock() - mock_url.path = "/v1/vector_stores/vs_123/files/file_456/content" - request.url = mock_url - - request.query_params = {"config": '{"key": "value"}', "invalid": "not-json{", "number": "42"} - - # Create a fresh state object without any attributes - class MockState: - pass - - request.state = MockState() - - await middleware.dispatch(request, AsyncMock()) - - expected = {"config": {"key": "value"}, "invalid": "not-json{", "number": 42} - assert request.state.extra_query == expected