Merged from main + fixed elasticsearch_url

2025-12-04 02:03:44 +00:00 · 2025-11-19 13:15:09 +01:00 · 2025-11-19 13:15:09 +01:00 · 7034637cac
commit 7034637cac
parent 723589ecd8 91f1b352b4
594 changed files with 79447 additions and 35172 deletions
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -11,9 +11,9 @@ import pytest
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError

-from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
+from llama_stack_api import Chunk, ExpiresAfter

 from ..conftest import vector_provider_wrapper

@ -649,7 +649,7 @@ def test_openai_vector_store_attach_file(
 ):
    """Test OpenAI vector store attach file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    compat_client = compat_client_with_empty_stores

@ -713,7 +713,7 @@ def test_openai_vector_store_attach_files_on_creation(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    # Create some files and attach them to the vector store
    valid_file_ids = []
@ -778,7 +778,7 @@ def test_openai_vector_store_list_files(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
@ -870,7 +870,7 @@ def test_openai_vector_store_retrieve_file_contents(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
@ -911,16 +911,16 @@ def test_openai_vector_store_retrieve_file_contents(
    )

    assert file_contents is not None
-    assert len(file_contents.content) == 1
-    content = file_contents.content[0]
+    assert file_contents.object == "vector_store.file_content.page"
+    assert len(file_contents.data) == 1
+    content = file_contents.data[0]

    # llama-stack-client returns a model, openai-python is a badboy and returns a dict
    if not isinstance(content, dict):
        content = content.model_dump()
    assert content["type"] == "text"
    assert content["text"] == test_content.decode("utf-8")
-    assert file_contents.filename == file_name
-    assert file_contents.attributes == attributes
+    assert file_contents.has_more is False


@vector_provider_wrapper
@ -931,7 +931,7 @@ def test_openai_vector_store_delete_file(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
@ -997,7 +997,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
@ -1049,7 +1049,7 @@ def test_openai_vector_store_update_file(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
@ -1106,7 +1106,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
    This test confirms that client.vector_stores.create() creates a unique ID
    """
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack.apis.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter

    compat_client = compat_client_with_empty_stores

@ -1487,14 +1487,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        )

        assert file_contents is not None
-        assert file_contents.filename == file_data[i][0]
-        assert len(file_contents.content) > 0
+        assert file_contents.object == "vector_store.file_content.page"
+        assert len(file_contents.data) > 0

        # Verify the content matches what we uploaded
        content_text = (
-            file_contents.content[0].text
-            if hasattr(file_contents.content[0], "text")
-            else file_contents.content[0]["text"]
+            file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
        )
        assert file_data[i][1].decode("utf-8") in content_text

@ -1610,3 +1608,97 @@ def test_openai_vector_store_embedding_config_from_metadata(

    assert "metadata_config_store" in store_names
    assert "consistent_config_store" in store_names
+
+
+@vector_provider_wrapper
+def test_openai_vector_store_file_contents_with_extra_query(
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
+    """Test that vector store file contents endpoint supports extra_query parameter."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(
+        name="test_extra_query_store",
+        extra_body={
+            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
+        },
+    )
+
+    # Create and attach a file
+    test_content = b"This is test content for extra_query validation."
+    with BytesIO(test_content) as file_buffer:
+        file_buffer.name = "test_extra_query.txt"
+        file = compat_client.files.create(
+            file=file_buffer,
+            purpose="assistants",
+            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),
+        )
+
+    file_attach_response = compat_client.vector_stores.files.create(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+        extra_body={"embedding_model": embedding_model_id},
+    )
+    assert file_attach_response.status == "completed"
+
+    # Wait for processing
+    time.sleep(2)
+
+    # Test that extra_query parameter is accepted and processed
+    content_with_extra_query = compat_client.vector_stores.files.content(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+        extra_query={"include_embeddings": True, "include_metadata": True},
+    )
+
+    # Test without extra_query for comparison
+    content_without_extra_query = compat_client.vector_stores.files.content(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+    )
+
+    # Validate that both calls succeed
+    assert content_with_extra_query is not None
+    assert content_without_extra_query is not None
+    assert len(content_with_extra_query.data) > 0
+    assert len(content_without_extra_query.data) > 0
+
+    # Validate that extra_query parameter is processed correctly
+    # Both should have the embedding/metadata fields available (may be None based on flags)
+    first_chunk_with_flags = content_with_extra_query.data[0]
+    first_chunk_without_flags = content_without_extra_query.data[0]
+
+    # The key validation: extra_query fields are present in the response
+    # Handle both dict and object responses (different clients may return different formats)
+    def has_field(obj, field):
+        if isinstance(obj, dict):
+            return field in obj
+        else:
+            return hasattr(obj, field)
+
+    # Validate that all expected fields are present in both responses
+    expected_fields = ["embedding", "chunk_metadata", "metadata", "text"]
+    for field in expected_fields:
+        assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query"
+        assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query"
+
+    # Validate content is the same
+    def get_field(obj, field):
+        if isinstance(obj, dict):
+            return obj[field]
+        else:
+            return getattr(obj, field)
+
+    assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8")
+    assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8")
+
+    with_flags_embedding = get_field(first_chunk_with_flags, "embedding")
+    without_flags_embedding = get_field(first_chunk_without_flags, "embedding")
+
+    # Validate that embeddings are included when requested and excluded when not requested
+    assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
+    assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
+    assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@ -6,7 +6,7 @@

 import pytest

-from llama_stack.apis.vector_io import Chunk
+from llama_stack_api import Chunk

 from ..conftest import vector_provider_wrapper