Enable remote::vllm (#384)

* Enable remote::vllm

* Kill the giant list of hard coded models
This commit is contained in:
Ashwin Bharambe 2024-11-06 14:42:44 -08:00 committed by GitHub
parent 093c9f1987
commit b10e9f46bb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 80 additions and 53 deletions

View file

@ -14,6 +14,7 @@ from llama_stack.distribution.datatypes import Api, Provider
from llama_stack.providers.adapters.inference.fireworks import FireworksImplConfig
from llama_stack.providers.adapters.inference.ollama import OllamaImplConfig
from llama_stack.providers.adapters.inference.together import TogetherImplConfig
from llama_stack.providers.adapters.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.impls.meta_reference.inference import (
MetaReferenceInferenceConfig,
)
@ -78,6 +79,21 @@ def inference_ollama(inference_model) -> ProviderFixture:
)
@pytest.fixture(scope="session")
def inference_vllm_remote() -> ProviderFixture:
return ProviderFixture(
providers=[
Provider(
provider_id="remote::vllm",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig(
url=get_env_or_fail("VLLM_URL"),
).model_dump(),
)
],
)
@pytest.fixture(scope="session")
def inference_fireworks() -> ProviderFixture:
return ProviderFixture(
@ -109,7 +125,14 @@ def inference_together() -> ProviderFixture:
)
INFERENCE_FIXTURES = ["meta_reference", "ollama", "fireworks", "together", "remote"]
INFERENCE_FIXTURES = [
"meta_reference",
"ollama",
"fireworks",
"together",
"vllm_remote",
"remote",
]
@pytest_asyncio.fixture(scope="session")