mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 20:49:48 +00:00
Merge branch 'vllm' into vllm-merge-1
This commit is contained in:
commit
8e358ec6a8
3 changed files with 64 additions and 17 deletions
|
|
@ -15,6 +15,7 @@ from llama_stack.distribution.datatypes import Api, Provider
|
|||
from llama_stack.providers.inline.inference.meta_reference import (
|
||||
MetaReferenceInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.vllm import VLLMConfig
|
||||
from llama_stack.providers.remote.inference.bedrock import BedrockConfig
|
||||
|
||||
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
||||
|
|
@ -104,6 +105,26 @@ def inference_ollama(inference_model) -> ProviderFixture:
|
|||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
def inference_vllm(inference_model) -> ProviderFixture:
|
||||
inference_model = (
|
||||
[inference_model] if isinstance(inference_model, str) else inference_model
|
||||
)
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id=f"vllm-{i}",
|
||||
provider_type="inline::vllm",
|
||||
config=VLLMConfig(
|
||||
model=m,
|
||||
enforce_eager=True, # Make test run faster
|
||||
).model_dump(),
|
||||
)
|
||||
for i, m in enumerate(inference_model)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def inference_vllm_remote() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
|
|
@ -236,6 +257,7 @@ INFERENCE_FIXTURES = [
|
|||
"ollama",
|
||||
"fireworks",
|
||||
"together",
|
||||
"vllm",
|
||||
"vllm_remote",
|
||||
"remote",
|
||||
"bedrock",
|
||||
|
|
@ -268,4 +290,8 @@ async def inference_stack(request, inference_model):
|
|||
],
|
||||
)
|
||||
|
||||
return test_stack.impls[Api.inference], test_stack.impls[Api.models]
|
||||
# Pytest yield fixture; see https://docs.pytest.org/en/stable/how-to/fixtures.html#yield-fixtures-recommended
|
||||
yield test_stack.impls[Api.inference], test_stack.impls[Api.models]
|
||||
|
||||
# Cleanup code that runs after test case completion
|
||||
await test_stack.impls[Api.inference].shutdown()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue