mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 10:10:36 +00:00
Merge remote-tracking branch 'upstream/main' into api-pkg
Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
commit
d6b915ce0a
48 changed files with 1990 additions and 425 deletions
|
|
@ -1,6 +1,7 @@
|
|||
{
|
||||
"default": [
|
||||
{"suite": "base", "setup": "ollama"},
|
||||
{"suite": "base", "setup": "ollama-postgres", "allowed_clients": ["server"], "stack_config": "server:ci-tests::run-with-postgres-store.yaml"},
|
||||
{"suite": "vision", "setup": "ollama-vision"},
|
||||
{"suite": "responses", "setup": "gpt"},
|
||||
{"suite": "base-vllm-subset", "setup": "vllm"}
|
||||
|
|
|
|||
|
|
@ -233,10 +233,21 @@ def instantiate_llama_stack_client(session):
|
|||
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
|
||||
|
||||
# Handle server:<config_name> format or server:<config_name>:<port>
|
||||
# Also handles server:<distro>::<run_file.yaml> format
|
||||
if config.startswith("server:"):
|
||||
parts = config.split(":")
|
||||
config_name = parts[1]
|
||||
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
|
||||
# Strip the "server:" prefix first
|
||||
config_part = config[7:] # len("server:") == 7
|
||||
|
||||
# Check for :: (distro::runfile format)
|
||||
if "::" in config_part:
|
||||
config_name = config_part
|
||||
port = int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
|
||||
else:
|
||||
# Single colon format: either <name> or <name>:<port>
|
||||
parts = config_part.split(":")
|
||||
config_name = parts[0]
|
||||
port = int(parts[1]) if len(parts) > 1 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
|
||||
|
||||
base_url = f"http://localhost:{port}"
|
||||
|
||||
force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1"
|
||||
|
|
@ -323,7 +334,13 @@ def require_server(llama_stack_client):
|
|||
@pytest.fixture(scope="session")
|
||||
def openai_client(llama_stack_client, require_server):
|
||||
base_url = f"{llama_stack_client.base_url}/v1"
|
||||
return OpenAI(base_url=base_url, api_key="fake")
|
||||
client = OpenAI(base_url=base_url, api_key="fake", max_retries=0, timeout=30.0)
|
||||
yield client
|
||||
# Cleanup: close HTTP connections
|
||||
try:
|
||||
client.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture(params=["openai_client", "client_with_models"])
|
||||
|
|
|
|||
4
tests/integration/recordings/README.md
generated
4
tests/integration/recordings/README.md
generated
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
This directory contains recorded inference API responses used for deterministic testing without requiring live API access.
|
||||
|
||||
For more information, see the
|
||||
[docs](https://llamastack.github.io/docs/contributing/testing/record-replay).
|
||||
This README provides more technical information.
|
||||
|
||||
## Structure
|
||||
|
||||
- `responses/` - JSON files containing request/response pairs for inference operations
|
||||
|
|
|
|||
|
|
@ -115,7 +115,15 @@ def openai_client(base_url, api_key, provider):
|
|||
client = LlamaStackAsLibraryClient(config, skip_logger_removal=True)
|
||||
return client
|
||||
|
||||
return OpenAI(
|
||||
client = OpenAI(
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
max_retries=0,
|
||||
timeout=30.0,
|
||||
)
|
||||
yield client
|
||||
# Cleanup: close HTTP connections
|
||||
try:
|
||||
client.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -65,8 +65,14 @@ class TestConversationResponses:
|
|||
conversation_items = openai_client.conversations.items.list(conversation.id)
|
||||
assert len(conversation_items.data) >= 4 # 2 user + 2 assistant messages
|
||||
|
||||
@pytest.mark.timeout(60, method="thread")
|
||||
def test_conversation_context_loading(self, openai_client, text_model_id):
|
||||
"""Test that conversation context is properly loaded for responses."""
|
||||
"""Test that conversation context is properly loaded for responses.
|
||||
|
||||
Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
|
||||
after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
|
||||
Investigation needed: connection pool exhaustion or event loop state issue.
|
||||
"""
|
||||
conversation = openai_client.conversations.create(
|
||||
items=[
|
||||
{"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},
|
||||
|
|
|
|||
|
|
@ -71,6 +71,26 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
|
|||
"embedding_model": "ollama/nomic-embed-text:v1.5",
|
||||
},
|
||||
),
|
||||
"ollama-postgres": Setup(
|
||||
name="ollama-postgres",
|
||||
description="Server-mode tests with Postgres-backed persistence",
|
||||
env={
|
||||
"OLLAMA_URL": "http://0.0.0.0:11434",
|
||||
"SAFETY_MODEL": "ollama/llama-guard3:1b",
|
||||
"POSTGRES_HOST": "127.0.0.1",
|
||||
"POSTGRES_PORT": "5432",
|
||||
"POSTGRES_DB": "llamastack",
|
||||
"POSTGRES_USER": "llamastack",
|
||||
"POSTGRES_PASSWORD": "llamastack",
|
||||
"LLAMA_STACK_LOGGING": "openai_responses=info",
|
||||
},
|
||||
defaults={
|
||||
"text_model": "ollama/llama3.2:3b-instruct-fp16",
|
||||
"embedding_model": "sentence-transformers/nomic-embed-text-v1.5",
|
||||
"safety_model": "ollama/llama-guard3:1b",
|
||||
"safety_shield": "llama-guard",
|
||||
},
|
||||
),
|
||||
"vllm": Setup(
|
||||
name="vllm",
|
||||
description="vLLM provider with a text model",
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import time
|
|||
from io import BytesIO
|
||||
|
||||
import pytest
|
||||
from llama_stack_api.apis.files import ExpiresAfter
|
||||
from llama_stack_api.apis.vector_io import Chunk
|
||||
from llama_stack_client import BadRequestError
|
||||
from openai import BadRequestError as OpenAIBadRequestError
|
||||
|
|
@ -1604,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata(
|
|||
|
||||
assert "metadata_config_store" in store_names
|
||||
assert "consistent_config_store" in store_names
|
||||
|
||||
|
||||
@vector_provider_wrapper
|
||||
def test_openai_vector_store_file_contents_with_extra_query(
|
||||
compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
|
||||
):
|
||||
"""Test that vector store file contents endpoint supports extra_query parameter."""
|
||||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
compat_client = compat_client_with_empty_stores
|
||||
|
||||
# Create a vector store
|
||||
vector_store = compat_client.vector_stores.create(
|
||||
name="test_extra_query_store",
|
||||
extra_body={
|
||||
"embedding_model": embedding_model_id,
|
||||
"provider_id": vector_io_provider_id,
|
||||
},
|
||||
)
|
||||
|
||||
# Create and attach a file
|
||||
test_content = b"This is test content for extra_query validation."
|
||||
with BytesIO(test_content) as file_buffer:
|
||||
file_buffer.name = "test_extra_query.txt"
|
||||
file = compat_client.files.create(
|
||||
file=file_buffer,
|
||||
purpose="assistants",
|
||||
expires_after=ExpiresAfter(anchor="created_at", seconds=86400),
|
||||
)
|
||||
|
||||
file_attach_response = compat_client.vector_stores.files.create(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file.id,
|
||||
extra_body={"embedding_model": embedding_model_id},
|
||||
)
|
||||
assert file_attach_response.status == "completed"
|
||||
|
||||
# Wait for processing
|
||||
time.sleep(2)
|
||||
|
||||
# Test that extra_query parameter is accepted and processed
|
||||
content_with_extra_query = compat_client.vector_stores.files.content(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file.id,
|
||||
extra_query={"include_embeddings": True, "include_metadata": True},
|
||||
)
|
||||
|
||||
# Test without extra_query for comparison
|
||||
content_without_extra_query = compat_client.vector_stores.files.content(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file.id,
|
||||
)
|
||||
|
||||
# Validate that both calls succeed
|
||||
assert content_with_extra_query is not None
|
||||
assert content_without_extra_query is not None
|
||||
assert len(content_with_extra_query.data) > 0
|
||||
assert len(content_without_extra_query.data) > 0
|
||||
|
||||
# Validate that extra_query parameter is processed correctly
|
||||
# Both should have the embedding/metadata fields available (may be None based on flags)
|
||||
first_chunk_with_flags = content_with_extra_query.data[0]
|
||||
first_chunk_without_flags = content_without_extra_query.data[0]
|
||||
|
||||
# The key validation: extra_query fields are present in the response
|
||||
# Handle both dict and object responses (different clients may return different formats)
|
||||
def has_field(obj, field):
|
||||
if isinstance(obj, dict):
|
||||
return field in obj
|
||||
else:
|
||||
return hasattr(obj, field)
|
||||
|
||||
# Validate that all expected fields are present in both responses
|
||||
expected_fields = ["embedding", "chunk_metadata", "metadata", "text"]
|
||||
for field in expected_fields:
|
||||
assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query"
|
||||
assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query"
|
||||
|
||||
# Validate content is the same
|
||||
def get_field(obj, field):
|
||||
if isinstance(obj, dict):
|
||||
return obj[field]
|
||||
else:
|
||||
return getattr(obj, field)
|
||||
|
||||
assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8")
|
||||
assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8")
|
||||
|
||||
with_flags_embedding = get_field(first_chunk_with_flags, "embedding")
|
||||
without_flags_embedding = get_field(first_chunk_without_flags, "embedding")
|
||||
|
||||
# Validate that embeddings are included when requested and excluded when not requested
|
||||
assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
|
||||
assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
|
||||
assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue