Merge branch 'main' into issue-3443-require_approval

This commit is contained in:
Matthew Farrellee 2025-09-30 12:10:13 -04:00
commit d2fdc70a8d
72 changed files with 1380 additions and 1406 deletions

View file

@ -178,10 +178,10 @@ Note that when re-recording tests, you must use a Stack pointing to a server (i.
### Basic Test Pattern
```python
def test_basic_completion(llama_stack_client, text_model_id):
response = llama_stack_client.inference.completion(
def test_basic_chat_completion(llama_stack_client, text_model_id):
response = llama_stack_client.inference.chat_completion(
model_id=text_model_id,
content=CompletionMessage(role="user", content="Hello"),
messages=[{"role": "user", "content": "Hello"}],
)
# Test structure, not AI output quality

View file

@ -166,7 +166,7 @@ def model_providers(llama_stack_client):
@pytest.fixture(autouse=True)
def skip_if_no_model(request):
model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id"]
model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id"]
test_func = request.node.function
actual_params = inspect.signature(test_func).parameters.keys()
@ -274,7 +274,7 @@ def require_server(llama_stack_client):
@pytest.fixture(scope="session")
def openai_client(llama_stack_client, require_server):
base_url = f"{llama_stack_client.base_url}/v1/openai/v1"
base_url = f"{llama_stack_client.base_url}/v1"
return OpenAI(base_url=base_url, api_key="fake")

View file

@ -87,7 +87,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
@pytest.fixture
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
base_url = f"{client_with_models.base_url}/v1"
return OpenAI(base_url=base_url, api_key="fake")

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Who is the CEO of Meta?"
}
],
"max_tokens": 0
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-708",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1759012142,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 24,
"prompt_tokens": 32,
"total_tokens": 56,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "What is the currency of Japan?"
}
],
"max_tokens": 0
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-343",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "The currency of Japan is the Japanese yen (, ry\u014d) and its symbol, \u00a5.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1759012146,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 20,
"prompt_tokens": 32,
"total_tokens": 52,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "What is the smallest country in the world?"
}
],
"max_tokens": 0
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-842",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "The smallest country in the world is the Vatican City, an independent city-state located within Rome, Italy. It has a total area of approximately 0.44 km\u00b2 (0.17 sq mi) and a population of around 800 people.\n\nDespite its tiny size, the Vatican City is a sovereign state with its own government, currency, postal system, and even a small army (the Gendarmeria Romana). It's also home to numerous iconic landmarks, including St. Peter's Basilica, the Sistine Chapel, and the Vatican Museums.\n\nThe Vatican City is so small that it can fit entirely within an average American city park!",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1759012145,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 133,
"prompt_tokens": 34,
"total_tokens": 167,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
],
"max_tokens": 0
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-808",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "The capital of France is Paris.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1759012142,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 8,
"prompt_tokens": 32,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "What is the largest planet in our solar system?"
}
],
"max_tokens": 0
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-282",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "The largest planet in our solar system is Jupiter. It is a gas giant, with a diameter of approximately 142,984 kilometers (88,846 miles). This makes it more than 11 times the diameter of the Earth and more than 2.5 times the mass of all the other planets in our solar system combined.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1759012143,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 67,
"prompt_tokens": 35,
"total_tokens": 102,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -107,14 +107,34 @@ async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item
assert "text/yaml" in str(w[0].message)
async def test_get_raw_document_text_supports_json_mime_type():
"""Test that the function accepts application/json mime type."""
json_content = '{"name": "test", "version": "1.0", "items": ["item1", "item2"]}'
document = Document(content=json_content, mime_type="application/json")
result = await get_raw_document_text(document)
assert result == json_content
async def test_get_raw_document_text_with_json_text_content_item():
"""Test that the function handles JSON TextContentItem correctly."""
json_content = '{"key": "value", "nested": {"array": [1, 2, 3]}}'
document = Document(content=TextContentItem(text=json_content), mime_type="application/json")
result = await get_raw_document_text(document)
assert result == json_content
async def test_get_raw_document_text_rejects_unsupported_mime_types():
"""Test that the function rejects unsupported mime types."""
document = Document(
content="Some content",
mime_type="application/json", # Not supported
mime_type="application/pdf", # Not supported
)
with pytest.raises(ValueError, match="Unexpected document mime type: application/json"):
with pytest.raises(ValueError, match="Unexpected document mime type: application/pdf"):
await get_raw_document_text(document)

View file

@ -42,10 +42,12 @@ from llama_stack.apis.inference import (
)
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
from llama_stack.core.access_control.access_control import default_policy
from llama_stack.core.datatypes import ResponsesStoreConfig
from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
OpenAIResponsesImpl,
)
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
@ -677,7 +679,9 @@ async def test_responses_store_list_input_items_logic():
# Create mock store and response store
mock_sql_store = AsyncMock()
responses_store = ResponsesStore(sql_store_config=None, policy=default_policy())
responses_store = ResponsesStore(
ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy()
)
responses_store.sql_store = mock_sql_store
# Setup test data - multiple input items

View file

@ -228,12 +228,13 @@ class TestS3FilesImpl:
mock_now.return_value = 0
from llama_stack.apis.files import ExpiresAfter
sample_text_file.filename = "test_expired_file"
uploaded = await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds=two_hours,
expires_after=ExpiresAfter(anchor="created_at", seconds=two_hours),
)
mock_now.return_value = two_hours * 2 # fast forward 4 hours
@ -259,42 +260,44 @@ class TestS3FilesImpl:
async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
"""Unsupported anchor value should raise ValueError."""
from llama_stack.apis.files import ExpiresAfter
sample_text_file.filename = "test_unsupported_expires_after_anchor"
with pytest.raises(ValueError, match="Input should be 'created_at'"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="now",
expires_after_seconds=3600,
expires_after=ExpiresAfter(anchor="now", seconds=3600), # type: ignore
)
async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
"""Non-integer seconds in expires_after should raise ValueError."""
from llama_stack.apis.files import ExpiresAfter
sample_text_file.filename = "test_nonint_expires_after_seconds"
with pytest.raises(ValueError, match="should be a valid integer"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds="many",
expires_after=ExpiresAfter(anchor="created_at", seconds="many"), # type: ignore
)
async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
"""Seconds outside allowed range should raise ValueError."""
from llama_stack.apis.files import ExpiresAfter
with pytest.raises(ValueError, match="greater than or equal to 3600"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds=3599,
expires_after=ExpiresAfter(anchor="created_at", seconds=3599),
)
with pytest.raises(ValueError, match="less than or equal to 2592000"):
await s3_provider.openai_upload_file(
file=sample_text_file,
purpose=OpenAIFilePurpose.ASSISTANTS,
expires_after_anchor="created_at",
expires_after_seconds=2592001,
expires_after=ExpiresAfter(anchor="created_at", seconds=2592001),
)

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,42 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.tools import ToolDef, ToolParameter
from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
convert_tooldef_to_chat_tool,
)
def test_convert_tooldef_to_chat_tool_preserves_items_field():
"""Test that array parameters preserve the items field during conversion.
This test ensures that when converting ToolDef with array-type parameters
to OpenAI ChatCompletionToolParam format, the 'items' field is preserved.
Without this fix, array parameters would be missing schema information about their items.
"""
tool_def = ToolDef(
name="test_tool",
description="A test tool with array parameter",
parameters=[
ToolParameter(
name="tags",
parameter_type="array",
description="List of tags",
required=True,
items={"type": "string"},
)
],
)
result = convert_tooldef_to_chat_tool(tool_def)
assert result["type"] == "function"
assert result["function"]["name"] == "test_tool"
tags_param = result["function"]["parameters"]["properties"]["tags"]
assert tags_param["type"] == "array"
assert "items" in tags_param, "items field should be preserved for array parameters"
assert tags_param["items"] == {"type": "string"}

View file

@ -5,13 +5,12 @@
# the root directory of this source tree.
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import MagicMock, patch
import numpy as np
import pytest
from llama_stack.apis.files import Files
from llama_stack.apis.inference import EmbeddingsResponse, Inference
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack.providers.datatypes import HealthStatus
@ -70,13 +69,6 @@ def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock:
return mock_vector_db
@pytest.fixture
def mock_inference_api(sample_embeddings):
mock_api = MagicMock(spec=Inference)
mock_api.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings))
return mock_api
@pytest.fixture
def mock_files_api():
mock_api = MagicMock(spec=Files)
@ -96,22 +88,6 @@ async def faiss_index(embedding_dimension):
yield index
@pytest.fixture
async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> FaissVectorIOAdapter:
# Create the adapter
adapter = FaissVectorIOAdapter(config=faiss_config, inference_api=mock_inference_api, files_api=mock_files_api)
# Create a mock KVStore
mock_kvstore = MagicMock()
mock_kvstore.values_in_range = AsyncMock(return_value=[])
# Patch the initialize method to avoid the kvstore_impl call
with patch.object(FaissVectorIOAdapter, "initialize"):
# Set the kvstore directly
adapter.kvstore = mock_kvstore
yield adapter
async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_identical(
faiss_index, sample_chunks, sample_embeddings, embedding_dimension
):

View file

@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic():
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
await store.store_response_object(response, input_list)
# Wait for all queued writes to complete
await store.flush()
# Test 1: First page with limit=2, descending order (default)
result = await store.list_responses(limit=2, order=Order.desc)
assert len(result.data) == 2
@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending():
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
await store.store_response_object(response, input_list)
# Wait for all queued writes to complete
await store.flush()
# Test ascending order pagination
result = await store.list_responses(limit=1, order=Order.asc)
assert len(result.data) == 1
@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter():
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
await store.store_response_object(response, input_list)
# Wait for all queued writes to complete
await store.flush()
# Test pagination with model filter
result = await store.list_responses(limit=1, model="model-a", order=Order.desc)
assert len(result.data) == 1
@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit():
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
await store.store_response_object(response, input_list)
# Wait for all queued writes to complete
await store.flush()
# Test without limit (should use default of 50)
result = await store.list_responses(order=Order.desc)
assert len(result.data) == 2
@ -212,6 +224,9 @@ async def test_responses_store_get_response_object():
input_list = [create_test_response_input("Test input content", "input-test-resp")]
await store.store_response_object(response, input_list)
# Wait for all queued writes to complete
await store.flush()
# Retrieve the response
retrieved = await store.get_response_object("test-resp")
assert retrieved.id == "test-resp"
@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination():
]
await store.store_response_object(response, input_list)
# Wait for all queued writes to complete
await store.flush()
# Verify all items are stored correctly with explicit IDs
all_items = await store.list_response_input_items("test-resp", order=Order.desc)
assert len(all_items.data) == 5
@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination():
]
await store.store_response_object(response, input_list)
# Wait for all queued writes to complete
await store.flush()
# Test before pagination with descending order
# In desc order: [Fifth, Fourth, Third, Second, First]
# before="before-3" should return [Fifth, Fourth]