Enable remote::vllm (#384)

* Enable remote::vllm * Kill the giant list of hard coded models
2025-06-30 19:53:44 +00:00 · 2024-11-06 14:42:44 -08:00 · 2024-11-06 14:42:44 -08:00 · b10e9f46bb
commit b10e9f46bb
parent 093c9f1987
5 changed files with 80 additions and 53 deletions
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -14,6 +14,7 @@ from llama_stack.distribution.datatypes import Api, Provider
 from llama_stack.providers.adapters.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.adapters.inference.ollama import OllamaImplConfig
 from llama_stack.providers.adapters.inference.together import TogetherImplConfig
+from llama_stack.providers.adapters.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.providers.impls.meta_reference.inference import (
    MetaReferenceInferenceConfig,
 )
@ -78,6 +79,21 @@ def inference_ollama(inference_model) -> ProviderFixture:
    )


+@pytest.fixture(scope="session")
+def inference_vllm_remote() -> ProviderFixture:
+    return ProviderFixture(
+        providers=[
+            Provider(
+                provider_id="remote::vllm",
+                provider_type="remote::vllm",
+                config=VLLMInferenceAdapterConfig(
+                    url=get_env_or_fail("VLLM_URL"),
+                ).model_dump(),
+            )
+        ],
+    )
+
+
@pytest.fixture(scope="session")
 def inference_fireworks() -> ProviderFixture:
    return ProviderFixture(
@ -109,7 +125,14 @@ def inference_together() -> ProviderFixture:
    )


-INFERENCE_FIXTURES = ["meta_reference", "ollama", "fireworks", "together", "remote"]
+INFERENCE_FIXTURES = [
+    "meta_reference",
+    "ollama",
+    "fireworks",
+    "together",
+    "vllm_remote",
+    "remote",
+]


@pytest_asyncio.fixture(scope="session")