Merge branch 'main' into inference_refactor

This commit is contained in:
Botao Chen 2024-12-17 20:10:23 -08:00
commit fadb7deae5
79 changed files with 1547 additions and 2026 deletions

View file

@ -81,13 +81,13 @@ def pytest_addoption(parser):
parser.addoption(
"--inference-model",
action="store",
default="meta-llama/Llama-3.1-8B-Instruct",
default="meta-llama/Llama-3.2-3B-Instruct",
help="Specify the inference model to use for testing",
)
parser.addoption(
"--safety-shield",
action="store",
default="meta-llama/Llama-Guard-3-8B",
default="meta-llama/Llama-Guard-3-1B",
help="Specify the safety shield to use for testing",
)

View file

@ -9,7 +9,7 @@ import tempfile
import pytest
import pytest_asyncio
from llama_stack.apis.models import ModelInput
from llama_stack.apis.models import ModelInput, ModelType
from llama_stack.distribution.datatypes import Api, Provider
from llama_stack.providers.inline.agents.meta_reference import (
@ -67,22 +67,42 @@ async def agents_stack(request, inference_model, safety_shield):
for key in ["inference", "safety", "memory", "agents"]:
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
providers[key] = fixture.providers
if key == "inference":
providers[key].append(
Provider(
provider_id="agents_memory_provider",
provider_type="inline::sentence-transformers",
config={},
)
)
if fixture.provider_data:
provider_data.update(fixture.provider_data)
inference_models = (
inference_model if isinstance(inference_model, list) else [inference_model]
)
models = [
ModelInput(
model_id=model,
model_type=ModelType.llm,
provider_id=providers["inference"][0].provider_id,
)
for model in inference_models
]
models.append(
ModelInput(
model_id="all-MiniLM-L6-v2",
model_type=ModelType.embedding,
provider_id="agents_memory_provider",
metadata={"embedding_dimension": 384},
)
)
test_stack = await construct_stack_for_test(
[Api.agents, Api.inference, Api.safety, Api.memory],
providers,
provider_data,
models=[
ModelInput(
model_id=model,
)
for model in inference_models
],
models=models,
shields=[safety_shield] if safety_shield else [],
)
return test_stack

View file

@ -113,6 +113,7 @@ def inference_vllm_remote() -> ProviderFixture:
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig(
url=get_env_or_fail("VLLM_URL"),
max_tokens=int(os.getenv("VLLM_MAX_TOKENS", 2048)),
).model_dump(),
)
],
@ -192,6 +193,19 @@ def inference_tgi() -> ProviderFixture:
)
@pytest.fixture(scope="session")
def inference_sentence_transformers() -> ProviderFixture:
return ProviderFixture(
providers=[
Provider(
provider_id="sentence_transformers",
provider_type="inline::sentence-transformers",
config={},
)
]
)
def get_model_short_name(model_name: str) -> str:
"""Convert model name to a short test identifier.

View file

@ -7,16 +7,19 @@
from pathlib import Path
import pytest
from PIL import Image as PIL_Image
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem, URL
from .utils import group_chunks
THIS_DIR = Path(__file__).parent
with open(THIS_DIR / "pasta.jpeg", "rb") as f:
PASTA_IMAGE = f.read()
class TestVisionModelInference:
@pytest.mark.asyncio
@ -24,12 +27,12 @@ class TestVisionModelInference:
"image, expected_strings",
[
(
ImageMedia(image=PIL_Image.open(THIS_DIR / "pasta.jpeg")),
ImageContentItem(data=PASTA_IMAGE),
["spaghetti"],
),
(
ImageMedia(
image=URL(
ImageContentItem(
url=URL(
uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
)
),
@ -58,7 +61,12 @@ class TestVisionModelInference:
model_id=inference_model,
messages=[
UserMessage(content="You are a helpful assistant."),
UserMessage(content=[image, "Describe this image in two sentences."]),
UserMessage(
content=[
image,
TextContentItem(text="Describe this image in two sentences."),
]
),
],
stream=False,
sampling_params=SamplingParams(max_tokens=100),
@ -89,8 +97,8 @@ class TestVisionModelInference:
)
images = [
ImageMedia(
image=URL(
ImageContentItem(
url=URL(
uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
)
),
@ -106,7 +114,12 @@ class TestVisionModelInference:
messages=[
UserMessage(content="You are a helpful assistant."),
UserMessage(
content=[image, "Describe this image in two sentences."]
content=[
image,
TextContentItem(
text="Describe this image in two sentences."
),
]
),
],
stream=True,

View file

@ -15,23 +15,23 @@ from .fixtures import MEMORY_FIXTURES
DEFAULT_PROVIDER_COMBINATIONS = [
pytest.param(
{
"inference": "meta_reference",
"inference": "sentence_transformers",
"memory": "faiss",
},
id="meta_reference",
marks=pytest.mark.meta_reference,
id="sentence_transformers",
marks=pytest.mark.sentence_transformers,
),
pytest.param(
{
"inference": "ollama",
"memory": "pgvector",
"memory": "faiss",
},
id="ollama",
marks=pytest.mark.ollama,
),
pytest.param(
{
"inference": "together",
"inference": "sentence_transformers",
"memory": "chroma",
},
id="chroma",
@ -58,10 +58,10 @@ DEFAULT_PROVIDER_COMBINATIONS = [
def pytest_addoption(parser):
parser.addoption(
"--inference-model",
"--embedding-model",
action="store",
default=None,
help="Specify the inference model to use for testing",
help="Specify the embedding model to use for testing",
)
@ -74,15 +74,15 @@ def pytest_configure(config):
def pytest_generate_tests(metafunc):
if "inference_model" in metafunc.fixturenames:
model = metafunc.config.getoption("--inference-model")
if not model:
raise ValueError(
"No inference model specified. Please provide a valid inference model."
)
params = [pytest.param(model, id="")]
if "embedding_model" in metafunc.fixturenames:
model = metafunc.config.getoption("--embedding-model")
if model:
params = [pytest.param(model, id="")]
else:
params = [pytest.param("all-MiniLM-L6-v2", id="")]
metafunc.parametrize("embedding_model", params, indirect=True)
metafunc.parametrize("inference_model", params, indirect=True)
if "memory_stack" in metafunc.fixturenames:
available_fixtures = {
"inference": INFERENCE_FIXTURES,

View file

@ -24,6 +24,13 @@ from ..conftest import ProviderFixture, remote_stack_fixture
from ..env import get_env_or_fail
@pytest.fixture(scope="session")
def embedding_model(request):
if hasattr(request, "param"):
return request.param
return request.config.getoption("--embedding-model", None)
@pytest.fixture(scope="session")
def memory_remote() -> ProviderFixture:
return remote_stack_fixture()
@ -107,7 +114,7 @@ MEMORY_FIXTURES = ["faiss", "pgvector", "weaviate", "remote", "chroma"]
@pytest_asyncio.fixture(scope="session")
async def memory_stack(inference_model, request):
async def memory_stack(embedding_model, request):
fixture_dict = request.param
providers = {}
@ -124,7 +131,7 @@ async def memory_stack(inference_model, request):
provider_data,
models=[
ModelInput(
model_id=inference_model,
model_id=embedding_model,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": get_env_or_fail("EMBEDDING_DIMENSION"),

View file

@ -46,13 +46,13 @@ def sample_documents():
async def register_memory_bank(
banks_impl: MemoryBanks, inference_model: str
banks_impl: MemoryBanks, embedding_model: str
) -> MemoryBank:
bank_id = f"test_bank_{uuid.uuid4().hex}"
return await banks_impl.register_memory_bank(
memory_bank_id=bank_id,
params=VectorMemoryBankParams(
embedding_model=inference_model,
embedding_model=embedding_model,
chunk_size_in_tokens=512,
overlap_size_in_tokens=64,
),
@ -61,11 +61,11 @@ async def register_memory_bank(
class TestMemory:
@pytest.mark.asyncio
async def test_banks_list(self, memory_stack, inference_model):
async def test_banks_list(self, memory_stack, embedding_model):
_, banks_impl = memory_stack
# Register a test bank
registered_bank = await register_memory_bank(banks_impl, inference_model)
registered_bank = await register_memory_bank(banks_impl, embedding_model)
try:
# Verify our bank shows up in list
@ -86,7 +86,7 @@ class TestMemory:
)
@pytest.mark.asyncio
async def test_banks_register(self, memory_stack, inference_model):
async def test_banks_register(self, memory_stack, embedding_model):
_, banks_impl = memory_stack
bank_id = f"test_bank_{uuid.uuid4().hex}"
@ -96,7 +96,7 @@ class TestMemory:
await banks_impl.register_memory_bank(
memory_bank_id=bank_id,
params=VectorMemoryBankParams(
embedding_model=inference_model,
embedding_model=embedding_model,
chunk_size_in_tokens=512,
overlap_size_in_tokens=64,
),
@ -111,7 +111,7 @@ class TestMemory:
await banks_impl.register_memory_bank(
memory_bank_id=bank_id,
params=VectorMemoryBankParams(
embedding_model=inference_model,
embedding_model=embedding_model,
chunk_size_in_tokens=512,
overlap_size_in_tokens=64,
),
@ -129,14 +129,14 @@ class TestMemory:
@pytest.mark.asyncio
async def test_query_documents(
self, memory_stack, inference_model, sample_documents
self, memory_stack, embedding_model, sample_documents
):
memory_impl, banks_impl = memory_stack
with pytest.raises(ValueError):
await memory_impl.insert_documents("test_bank", sample_documents)
registered_bank = await register_memory_bank(banks_impl, inference_model)
registered_bank = await register_memory_bank(banks_impl, embedding_model)
await memory_impl.insert_documents(
registered_bank.memory_bank_id, sample_documents
)

View file

@ -7,8 +7,8 @@
import pytest
import pytest_asyncio
from llama_models.llama3.api.datatypes import URL
from llama_stack.apis.common.type_system import * # noqa: F403
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.datasets import DatasetInput
from llama_stack.apis.models import ModelInput

View file

@ -74,7 +74,9 @@ def pytest_addoption(parser):
SAFETY_SHIELD_PARAMS = [
pytest.param("Llama-Guard-3-1B", marks=pytest.mark.guard_1b, id="guard_1b"),
pytest.param(
"meta-llama/Llama-Guard-3-1B", marks=pytest.mark.guard_1b, id="guard_1b"
),
]
@ -86,6 +88,7 @@ def pytest_generate_tests(metafunc):
if "safety_shield" in metafunc.fixturenames:
shield_id = metafunc.config.getoption("--safety-shield")
if shield_id:
assert shield_id.startswith("meta-llama/")
params = [pytest.param(shield_id, id="")]
else:
params = SAFETY_SHIELD_PARAMS

View file

@ -10,6 +10,7 @@ from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.safety import * # noqa: F403
from llama_stack.distribution.datatypes import * # noqa: F403
from llama_stack.apis.inference import UserMessage
# How to run this test:
#