Update the "InterleavedTextMedia" type (#635)

## What does this PR do?

This is a long-pending change and particularly important to get done
now.

Specifically:
- we cannot "localize" (aka download) any URLs from media attachments
anywhere near our modeling code. it must be done within llama-stack.
- `PIL.Image` is infesting all our APIs via `ImageMedia ->
InterleavedTextMedia` and that cannot be right at all. Anything in the
API surface must be "naturally serializable". We need a standard `{
type: "image", image_url: "<...>" }` which is more extensible
- `UserMessage`, `SystemMessage`, etc. are moved completely to
llama-stack from the llama-models repository.

See https://github.com/meta-llama/llama-models/pull/244 for the
corresponding PR in llama-models.

## Test Plan

```bash
cd llama_stack/providers/tests

pytest -s -v -k "fireworks or ollama or together" inference/test_vision_inference.py
pytest -s -v -k "(fireworks or ollama or together) and llama_3b" inference/test_text_inference.py
pytest -s -v -k chroma memory/test_memory.py \
  --env EMBEDDING_DIMENSION=384 --env CHROMA_DB_PATH=/tmp/foobar

pytest -s -v -k fireworks agents/test_agents.py  \
   --safety-shield=meta-llama/Llama-Guard-3-8B \
   --inference-model=meta-llama/Llama-3.1-8B-Instruct
```

Updated the client sdk (see PR ...), installed the SDK in the same
environment and then ran the SDK tests:

```bash
cd tests/client-sdk
LLAMA_STACK_CONFIG=together pytest -s -v agents/test_agents.py
LLAMA_STACK_CONFIG=ollama pytest -s -v memory/test_memory.py

# this one needed a bit of hacking in the run.yaml to ensure I could register the vision model correctly
INFERENCE_MODEL=llama3.2-vision:latest LLAMA_STACK_CONFIG=ollama pytest -s -v inference/test_inference.py
```
This commit is contained in:
Ashwin Bharambe 2024-12-17 11:18:31 -08:00 committed by GitHub
parent 10eb31badf
commit 8de8eb03c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 1344 additions and 1801 deletions

View file

@ -15,23 +15,23 @@ from .fixtures import MEMORY_FIXTURES
DEFAULT_PROVIDER_COMBINATIONS = [
pytest.param(
{
"inference": "meta_reference",
"inference": "sentence_transformers",
"memory": "faiss",
},
id="meta_reference",
marks=pytest.mark.meta_reference,
id="sentence_transformers",
marks=pytest.mark.sentence_transformers,
),
pytest.param(
{
"inference": "ollama",
"memory": "pgvector",
"memory": "faiss",
},
id="ollama",
marks=pytest.mark.ollama,
),
pytest.param(
{
"inference": "together",
"inference": "sentence_transformers",
"memory": "chroma",
},
id="chroma",
@ -58,10 +58,10 @@ DEFAULT_PROVIDER_COMBINATIONS = [
def pytest_addoption(parser):
parser.addoption(
"--inference-model",
"--embedding-model",
action="store",
default=None,
help="Specify the inference model to use for testing",
help="Specify the embedding model to use for testing",
)
@ -74,15 +74,15 @@ def pytest_configure(config):
def pytest_generate_tests(metafunc):
if "inference_model" in metafunc.fixturenames:
model = metafunc.config.getoption("--inference-model")
if not model:
raise ValueError(
"No inference model specified. Please provide a valid inference model."
)
params = [pytest.param(model, id="")]
if "embedding_model" in metafunc.fixturenames:
model = metafunc.config.getoption("--embedding-model")
if model:
params = [pytest.param(model, id="")]
else:
params = [pytest.param("all-MiniLM-L6-v2", id="")]
metafunc.parametrize("embedding_model", params, indirect=True)
metafunc.parametrize("inference_model", params, indirect=True)
if "memory_stack" in metafunc.fixturenames:
available_fixtures = {
"inference": INFERENCE_FIXTURES,

View file

@ -24,6 +24,13 @@ from ..conftest import ProviderFixture, remote_stack_fixture
from ..env import get_env_or_fail
@pytest.fixture(scope="session")
def embedding_model(request):
if hasattr(request, "param"):
return request.param
return request.config.getoption("--embedding-model", None)
@pytest.fixture(scope="session")
def memory_remote() -> ProviderFixture:
return remote_stack_fixture()
@ -107,7 +114,7 @@ MEMORY_FIXTURES = ["faiss", "pgvector", "weaviate", "remote", "chroma"]
@pytest_asyncio.fixture(scope="session")
async def memory_stack(inference_model, request):
async def memory_stack(embedding_model, request):
fixture_dict = request.param
providers = {}
@ -124,7 +131,7 @@ async def memory_stack(inference_model, request):
provider_data,
models=[
ModelInput(
model_id=inference_model,
model_id=embedding_model,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": get_env_or_fail("EMBEDDING_DIMENSION"),

View file

@ -46,13 +46,13 @@ def sample_documents():
async def register_memory_bank(
banks_impl: MemoryBanks, inference_model: str
banks_impl: MemoryBanks, embedding_model: str
) -> MemoryBank:
bank_id = f"test_bank_{uuid.uuid4().hex}"
return await banks_impl.register_memory_bank(
memory_bank_id=bank_id,
params=VectorMemoryBankParams(
embedding_model=inference_model,
embedding_model=embedding_model,
chunk_size_in_tokens=512,
overlap_size_in_tokens=64,
),
@ -61,11 +61,11 @@ async def register_memory_bank(
class TestMemory:
@pytest.mark.asyncio
async def test_banks_list(self, memory_stack, inference_model):
async def test_banks_list(self, memory_stack, embedding_model):
_, banks_impl = memory_stack
# Register a test bank
registered_bank = await register_memory_bank(banks_impl, inference_model)
registered_bank = await register_memory_bank(banks_impl, embedding_model)
try:
# Verify our bank shows up in list
@ -86,7 +86,7 @@ class TestMemory:
)
@pytest.mark.asyncio
async def test_banks_register(self, memory_stack, inference_model):
async def test_banks_register(self, memory_stack, embedding_model):
_, banks_impl = memory_stack
bank_id = f"test_bank_{uuid.uuid4().hex}"
@ -96,7 +96,7 @@ class TestMemory:
await banks_impl.register_memory_bank(
memory_bank_id=bank_id,
params=VectorMemoryBankParams(
embedding_model=inference_model,
embedding_model=embedding_model,
chunk_size_in_tokens=512,
overlap_size_in_tokens=64,
),
@ -111,7 +111,7 @@ class TestMemory:
await banks_impl.register_memory_bank(
memory_bank_id=bank_id,
params=VectorMemoryBankParams(
embedding_model=inference_model,
embedding_model=embedding_model,
chunk_size_in_tokens=512,
overlap_size_in_tokens=64,
),
@ -129,14 +129,14 @@ class TestMemory:
@pytest.mark.asyncio
async def test_query_documents(
self, memory_stack, inference_model, sample_documents
self, memory_stack, embedding_model, sample_documents
):
memory_impl, banks_impl = memory_stack
with pytest.raises(ValueError):
await memory_impl.insert_documents("test_bank", sample_documents)
registered_bank = await register_memory_bank(banks_impl, inference_model)
registered_bank = await register_memory_bank(banks_impl, embedding_model)
await memory_impl.insert_documents(
registered_bank.memory_bank_id, sample_documents
)