mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-18 03:19:52 +00:00
implement embedding generation in supported inference providers
This commit is contained in:
parent
b896be2311
commit
e167e9eb93
16 changed files with 383 additions and 29 deletions
|
|
@ -18,6 +18,12 @@ def pytest_addoption(parser):
|
|||
default=None,
|
||||
help="Specify the inference model to use for testing",
|
||||
)
|
||||
parser.addoption(
|
||||
"--embedding-model",
|
||||
action="store",
|
||||
default=None,
|
||||
help="Specify the embedding model to use for testing",
|
||||
)
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
|
|
@ -78,3 +84,24 @@ def pytest_generate_tests(metafunc):
|
|||
):
|
||||
fixtures = [stack.values[0]["inference"] for stack in filtered_stacks]
|
||||
metafunc.parametrize("inference_stack", fixtures, indirect=True)
|
||||
|
||||
if "embedding_model" in metafunc.fixturenames:
|
||||
model = metafunc.config.getoption("--embedding-model")
|
||||
if not model:
|
||||
raise ValueError(
|
||||
"No embedding model specified. Please provide a valid embedding model."
|
||||
)
|
||||
params = [pytest.param(model, id="")]
|
||||
|
||||
metafunc.parametrize("embedding_model", params, indirect=True)
|
||||
|
||||
if "embedding_stack" in metafunc.fixturenames:
|
||||
fixtures = INFERENCE_FIXTURES
|
||||
if filtered_stacks := get_provider_fixture_overrides(
|
||||
metafunc.config,
|
||||
{
|
||||
"inference": INFERENCE_FIXTURES,
|
||||
},
|
||||
):
|
||||
fixtures = [stack.values[0]["inference"] for stack in filtered_stacks]
|
||||
metafunc.parametrize("embedding_stack", fixtures, indirect=True)
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ import os
|
|||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from llama_stack.apis.models import ModelInput
|
||||
|
||||
from llama_stack.apis.models import ModelInput, ModelType
|
||||
from llama_stack.distribution.datatypes import Api, Provider
|
||||
|
||||
from llama_stack.providers.inline.inference.meta_reference import (
|
||||
MetaReferenceInferenceConfig,
|
||||
)
|
||||
|
|
@ -37,6 +37,13 @@ def inference_model(request):
|
|||
return request.config.getoption("--inference-model", None)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def embedding_model(request):
|
||||
if hasattr(request, "param"):
|
||||
return request.param
|
||||
return request.config.getoption("--embedding-model", None)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def inference_remote() -> ProviderFixture:
|
||||
return remote_stack_fixture()
|
||||
|
|
@ -85,7 +92,7 @@ def inference_ollama(inference_model) -> ProviderFixture:
|
|||
inference_model = (
|
||||
[inference_model] if isinstance(inference_model, str) else inference_model
|
||||
)
|
||||
if "Llama3.1-8B-Instruct" in inference_model:
|
||||
if inference_model and "Llama3.1-8B-Instruct" in inference_model:
|
||||
pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing")
|
||||
|
||||
return ProviderFixture(
|
||||
|
|
@ -240,3 +247,25 @@ async def inference_stack(request, inference_model):
|
|||
)
|
||||
|
||||
return test_stack.impls[Api.inference], test_stack.impls[Api.models]
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def embedding_stack(request, embedding_model):
|
||||
fixture_name = request.param
|
||||
inference_fixture = request.getfixturevalue(f"inference_{fixture_name}")
|
||||
test_stack = await construct_stack_for_test(
|
||||
[Api.inference],
|
||||
{"inference": inference_fixture.providers},
|
||||
inference_fixture.provider_data,
|
||||
models=[
|
||||
ModelInput(
|
||||
model_id=embedding_model,
|
||||
model_type=ModelType.embedding_model,
|
||||
metadata={
|
||||
"embedding_dimension": get_env_or_fail("EMBEDDING_DIMENSION"),
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
return test_stack.impls[Api.inference], test_stack.impls[Api.models]
|
||||
|
|
|
|||
62
llama_stack/providers/tests/inference/test_embeddings.py
Normal file
62
llama_stack/providers/tests/inference/test_embeddings.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.inference import EmbeddingsResponse, ModelType
|
||||
|
||||
# How to run this test:
|
||||
# pytest -v -s llama_stack/providers/tests/inference/test_embeddings.py
|
||||
|
||||
|
||||
class TestEmbeddings:
|
||||
@pytest.mark.asyncio
|
||||
async def test_embeddings(self, embedding_model, embedding_stack):
|
||||
inference_impl, models_impl = embedding_stack
|
||||
model = await models_impl.get_model(embedding_model)
|
||||
|
||||
if model.model_type != ModelType.embedding_model:
|
||||
pytest.skip("This test is only applicable for embedding models")
|
||||
|
||||
response = await inference_impl.embeddings(
|
||||
model_id=embedding_model,
|
||||
contents=["Hello, world!"],
|
||||
)
|
||||
assert isinstance(response, EmbeddingsResponse)
|
||||
assert len(response.embeddings) > 0
|
||||
assert all(isinstance(embedding, list) for embedding in response.embeddings)
|
||||
assert all(
|
||||
isinstance(value, float)
|
||||
for embedding in response.embeddings
|
||||
for value in embedding
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_embeddings(self, embedding_model, embedding_stack):
|
||||
inference_impl, models_impl = embedding_stack
|
||||
model = await models_impl.get_model(embedding_model)
|
||||
|
||||
if model.model_type != ModelType.embedding_model:
|
||||
pytest.skip("This test is only applicable for embedding models")
|
||||
|
||||
texts = ["Hello, world!", "This is a test", "Testing embeddings"]
|
||||
|
||||
response = await inference_impl.embeddings(
|
||||
model_id=embedding_model,
|
||||
contents=texts,
|
||||
)
|
||||
|
||||
assert isinstance(response, EmbeddingsResponse)
|
||||
assert len(response.embeddings) == len(texts)
|
||||
assert all(isinstance(embedding, list) for embedding in response.embeddings)
|
||||
assert all(
|
||||
isinstance(value, float)
|
||||
for embedding in response.embeddings
|
||||
for value in embedding
|
||||
)
|
||||
|
||||
embedding_dim = len(response.embeddings[0])
|
||||
assert all(len(embedding) == embedding_dim for embedding in response.embeddings)
|
||||
Loading…
Add table
Add a link
Reference in a new issue