This commit is contained in:
Yuan Tang 2024-10-03 22:05:07 -04:00
parent 765f2c86af
commit ad4e65e876
No known key found for this signature in database
4 changed files with 9 additions and 10 deletions

View file

@ -7,4 +7,4 @@ distribution_spec:
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda
image_type: conda

View file

@ -4,17 +4,15 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .config import DatabricksImplConfig
from .vllm import InferenceEndpointAdapter, VLLMAdapter
from .config import VLLMImplConfig
from .vllm import VLLMInferenceAdapter
async def get_adapter_impl(config: DatabricksImplConfig, _deps):
assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}"
async def get_adapter_impl(config: VLLMImplConfig, _deps):
assert isinstance(config, VLLMImplConfig), f"Unexpected config type: {type(config)}"
if config.url is not None:
impl = VLLMAdapter(config)
elif config.is_inference_endpoint():
impl = InferenceEndpointAdapter(config)
impl = VLLMInferenceAdapter(config)
else:
raise ValueError(
"Invalid configuration. Specify either an URL or HF Inference Endpoint details (namespace and endpoint name)."

View file

@ -22,8 +22,8 @@ from .config import VLLMImplConfig
# Reference: https://docs.vllm.ai/en/latest/models/supported_models.html
VLLM_SUPPORTED_MODELS = {
"Llama3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
# "Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
# "Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
}

View file

@ -66,6 +66,7 @@ def available_providers() -> List[ProviderSpec]:
adapter_type="vllm",
pip_packages=["openai"],
module="llama_stack.providers.adapters.inference.vllm",
config_class="llama_stack.providers.adapters.inference.vllm.VLLMImplConfig",
),
),
remote_provider_spec(