Add inline vLLM provider to regression tests

This commit is contained in:
Fred Reiss 2024-12-19 11:27:59 -08:00
parent 5e08812bcb
commit b74a05114f
2 changed files with 38 additions and 10 deletions

View file

@ -15,6 +15,7 @@ from llama_stack.distribution.datatypes import Api, Provider
from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceInferenceConfig,
)
from llama_stack.providers.inline.inference.vllm import VLLMConfig
from llama_stack.providers.remote.inference.bedrock import BedrockConfig
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
@ -104,6 +105,26 @@ def inference_ollama(inference_model) -> ProviderFixture:
)
@pytest_asyncio.fixture(scope="session")
def inference_vllm(inference_model) -> ProviderFixture:
inference_model = (
[inference_model] if isinstance(inference_model, str) else inference_model
)
return ProviderFixture(
providers=[
Provider(
provider_id=f"vllm-{i}",
provider_type="inline::vllm",
config=VLLMConfig(
model=m,
enforce_eager=True, # Make test run faster
).model_dump(),
)
for i, m in enumerate(inference_model)
]
)
@pytest.fixture(scope="session")
def inference_vllm_remote() -> ProviderFixture:
return ProviderFixture(
@ -222,6 +243,7 @@ INFERENCE_FIXTURES = [
"ollama",
"fireworks",
"together",
"vllm",
"vllm_remote",
"remote",
"bedrock",
@ -254,4 +276,8 @@ async def inference_stack(request, inference_model):
],
)
return test_stack.impls[Api.inference], test_stack.impls[Api.models]
# Pytest yield fixture; see https://docs.pytest.org/en/stable/how-to/fixtures.html#yield-fixtures-recommended
yield test_stack.impls[Api.inference], test_stack.impls[Api.models]
# Cleanup code that runs after test case completion
await test_stack.impls[Api.inference].shutdown()