Fixes

2025-07-29 15:23:51 +00:00 · 2024-10-03 22:05:07 -04:00 · 2024-10-03 22:05:07 -04:00 · ad4e65e876
commit ad4e65e876
parent 765f2c86af
4 changed files with 9 additions and 10 deletions
--- a/llama_stack/providers/adapters/inference/vllm/init.py
+++ b/llama_stack/providers/adapters/inference/vllm/init.py
@ -4,17 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .config import DatabricksImplConfig
+from .config import VLLMImplConfig
-from .vllm import InferenceEndpointAdapter, VLLMAdapter
+from .vllm import VLLMInferenceAdapter
-async def get_adapter_impl(config: DatabricksImplConfig, _deps):
+async def get_adapter_impl(config: VLLMImplConfig, _deps):
-    assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}"
+    assert isinstance(config, VLLMImplConfig), f"Unexpected config type: {type(config)}"
    if config.url is not None:
-        impl = VLLMAdapter(config)
+        impl = VLLMInferenceAdapter(config)
    elif config.is_inference_endpoint():
        impl = InferenceEndpointAdapter(config)
    else:
        raise ValueError(
            "Invalid configuration. Specify either an URL or HF Inference Endpoint details (namespace and endpoint name)."
--- a/llama_stack/providers/adapters/inference/vllm/vllm.py
+++ b/llama_stack/providers/adapters/inference/vllm/vllm.py
@ -22,8 +22,8 @@ from .config import VLLMImplConfig
 # Reference: https://docs.vllm.ai/en/latest/models/supported_models.html
 VLLM_SUPPORTED_MODELS = {
    "Llama3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    # "Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-    "Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
+    # "Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
 }
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -66,6 +66,7 @@ def available_providers() -> List[ProviderSpec]:
                adapter_type="vllm",
                pip_packages=["openai"],
                module="llama_stack.providers.adapters.inference.vllm",
                config_class="llama_stack.providers.adapters.inference.vllm.VLLMImplConfig",
            ),
        ),
        remote_provider_spec(