Merge branch 'main' into aidand-391-guided-decoding-vllm_3

This commit is contained in:
Aidan Do 2024-12-08 17:36:57 +11:00 committed by GitHub
commit 44bb23ebc8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
177 changed files with 5725 additions and 594 deletions

View file

@ -81,6 +81,18 @@ class TestDatasetIO:
assert len(response) == 1
assert response[0].identifier == "test_dataset"
with pytest.raises(Exception) as exc_info:
# unregister a dataset that does not exist
await datasets_impl.unregister_dataset("test_dataset2")
await datasets_impl.unregister_dataset("test_dataset")
response = await datasets_impl.list_datasets()
assert isinstance(response, list)
assert len(response) == 0
with pytest.raises(Exception) as exc_info:
await datasets_impl.unregister_dataset("test_dataset")
@pytest.mark.asyncio
async def test_get_rows_paginated(self, datasetio_stack):
datasetio_impl, datasets_impl = datasetio_stack

View file

@ -6,10 +6,14 @@
import pytest
from ..agents.fixtures import AGENTS_FIXTURES
from ..conftest import get_provider_fixture_overrides
from ..datasetio.fixtures import DATASETIO_FIXTURES
from ..inference.fixtures import INFERENCE_FIXTURES
from ..memory.fixtures import MEMORY_FIXTURES
from ..safety.fixtures import SAFETY_FIXTURES
from ..scoring.fixtures import SCORING_FIXTURES
from .fixtures import EVAL_FIXTURES
@ -20,6 +24,9 @@ DEFAULT_PROVIDER_COMBINATIONS = [
"scoring": "basic",
"datasetio": "localfs",
"inference": "fireworks",
"agents": "meta_reference",
"safety": "llama_guard",
"memory": "faiss",
},
id="meta_reference_eval_fireworks_inference",
marks=pytest.mark.meta_reference_eval_fireworks_inference,
@ -30,6 +37,9 @@ DEFAULT_PROVIDER_COMBINATIONS = [
"scoring": "basic",
"datasetio": "localfs",
"inference": "together",
"agents": "meta_reference",
"safety": "llama_guard",
"memory": "faiss",
},
id="meta_reference_eval_together_inference",
marks=pytest.mark.meta_reference_eval_together_inference,
@ -40,6 +50,9 @@ DEFAULT_PROVIDER_COMBINATIONS = [
"scoring": "basic",
"datasetio": "huggingface",
"inference": "together",
"agents": "meta_reference",
"safety": "llama_guard",
"memory": "faiss",
},
id="meta_reference_eval_together_inference_huggingface_datasetio",
marks=pytest.mark.meta_reference_eval_together_inference_huggingface_datasetio,
@ -75,6 +88,9 @@ def pytest_generate_tests(metafunc):
"scoring": SCORING_FIXTURES,
"datasetio": DATASETIO_FIXTURES,
"inference": INFERENCE_FIXTURES,
"agents": AGENTS_FIXTURES,
"safety": SAFETY_FIXTURES,
"memory": MEMORY_FIXTURES,
}
combinations = (
get_provider_fixture_overrides(metafunc.config, available_fixtures)

View file

@ -40,14 +40,30 @@ async def eval_stack(request):
providers = {}
provider_data = {}
for key in ["datasetio", "eval", "scoring", "inference"]:
for key in [
"datasetio",
"eval",
"scoring",
"inference",
"agents",
"safety",
"memory",
]:
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
providers[key] = fixture.providers
if fixture.provider_data:
provider_data.update(fixture.provider_data)
test_stack = await construct_stack_for_test(
[Api.eval, Api.datasetio, Api.inference, Api.scoring],
[
Api.eval,
Api.datasetio,
Api.inference,
Api.scoring,
Api.agents,
Api.safety,
Api.memory,
],
providers,
provider_data,
)

View file

@ -17,9 +17,11 @@ from llama_stack.providers.inline.inference.meta_reference import (
)
from llama_stack.providers.remote.inference.bedrock import BedrockConfig
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
from llama_stack.providers.remote.inference.together import TogetherImplConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.tests.resolver import construct_stack_for_test
@ -63,6 +65,21 @@ def inference_meta_reference(inference_model) -> ProviderFixture:
)
@pytest.fixture(scope="session")
def inference_cerebras() -> ProviderFixture:
return ProviderFixture(
providers=[
Provider(
provider_id="cerebras",
provider_type="remote::cerebras",
config=CerebrasImplConfig(
api_key=get_env_or_fail("CEREBRAS_API_KEY"),
).model_dump(),
)
],
)
@pytest.fixture(scope="session")
def inference_ollama(inference_model) -> ProviderFixture:
inference_model = (
@ -156,6 +173,22 @@ def inference_nvidia() -> ProviderFixture:
)
@pytest.fixture(scope="session")
def inference_tgi() -> ProviderFixture:
return ProviderFixture(
providers=[
Provider(
provider_id="tgi",
provider_type="remote::tgi",
config=TGIImplConfig(
url=get_env_or_fail("TGI_URL"),
api_token=os.getenv("TGI_API_TOKEN", None),
).model_dump(),
)
],
)
def get_model_short_name(model_name: str) -> str:
"""Convert model name to a short test identifier.
@ -189,7 +222,9 @@ INFERENCE_FIXTURES = [
"vllm_remote",
"remote",
"bedrock",
"cerebras",
"nvidia",
"tgi",
]

View file

@ -94,6 +94,7 @@ class TestInference:
"remote::tgi",
"remote::together",
"remote::fireworks",
"remote::cerebras",
):
pytest.skip("Other inference providers don't support completion() yet")
@ -140,6 +141,8 @@ class TestInference:
"remote::together",
"remote::fireworks",
"remote::vllm",
"remote::cerebras",
):
pytest.skip(
"Other inference providers don't support structured output in completions yet"
@ -213,7 +216,15 @@ class TestInference:
response = await inference_impl.chat_completion(
model_id=inference_model,
messages=[
SystemMessage(content="You are a helpful assistant."),
# we include context about Michael Jordan in the prompt so that the test is
# focused on the funtionality of the model and not on the information embedded
# in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.
SystemMessage(
content=(
"You are a helpful assistant.\n\n"
"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
)
),
UserMessage(content="Please give me information about Michael Jordan."),
],
stream=False,

View file

@ -10,9 +10,10 @@ import pytest_asyncio
from llama_stack.apis.models import ModelInput
from llama_stack.distribution.datatypes import Api, Provider
from llama_stack.providers.inline.scoring.braintrust import BraintrustScoringConfig
from llama_stack.providers.tests.resolver import construct_stack_for_test
from ..conftest import ProviderFixture, remote_stack_fixture
from ..env import get_env_or_fail
@pytest.fixture(scope="session")
@ -40,7 +41,9 @@ def scoring_braintrust() -> ProviderFixture:
Provider(
provider_id="braintrust",
provider_type="inline::braintrust",
config={},
config=BraintrustScoringConfig(
openai_api_key=get_env_or_fail("OPENAI_API_KEY"),
).model_dump(),
)
],
)