This commit is contained in:
Yuan Tang 2024-10-03 22:05:07 -04:00
parent 765f2c86af
commit ad4e65e876
No known key found for this signature in database
4 changed files with 9 additions and 10 deletions

View file

@ -4,17 +4,15 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .config import DatabricksImplConfig from .config import VLLMImplConfig
from .vllm import InferenceEndpointAdapter, VLLMAdapter from .vllm import VLLMInferenceAdapter
async def get_adapter_impl(config: DatabricksImplConfig, _deps): async def get_adapter_impl(config: VLLMImplConfig, _deps):
assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, VLLMImplConfig), f"Unexpected config type: {type(config)}"
if config.url is not None: if config.url is not None:
impl = VLLMAdapter(config) impl = VLLMInferenceAdapter(config)
elif config.is_inference_endpoint():
impl = InferenceEndpointAdapter(config)
else: else:
raise ValueError( raise ValueError(
"Invalid configuration. Specify either an URL or HF Inference Endpoint details (namespace and endpoint name)." "Invalid configuration. Specify either an URL or HF Inference Endpoint details (namespace and endpoint name)."

View file

@ -22,8 +22,8 @@ from .config import VLLMImplConfig
# Reference: https://docs.vllm.ai/en/latest/models/supported_models.html # Reference: https://docs.vllm.ai/en/latest/models/supported_models.html
VLLM_SUPPORTED_MODELS = { VLLM_SUPPORTED_MODELS = {
"Llama3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct", "Llama3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct", # "Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct", # "Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
} }

View file

@ -66,6 +66,7 @@ def available_providers() -> List[ProviderSpec]:
adapter_type="vllm", adapter_type="vllm",
pip_packages=["openai"], pip_packages=["openai"],
module="llama_stack.providers.adapters.inference.vllm", module="llama_stack.providers.adapters.inference.vllm",
config_class="llama_stack.providers.adapters.inference.vllm.VLLMImplConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(