mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 08:44:44 +00:00
refactor according to repo updates
This commit is contained in:
parent
64c5d38ae9
commit
111e32ffe5
4 changed files with 24 additions and 18 deletions
|
@ -7,6 +7,7 @@
|
||||||
from .config import NutanixImplConfig
|
from .config import NutanixImplConfig
|
||||||
from .nutanix import NutanixInferenceAdapter
|
from .nutanix import NutanixInferenceAdapter
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: NutanixInferenceAdapter, _deps):
|
async def get_adapter_impl(config: NutanixInferenceAdapter, _deps):
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
config, NutanixImplConfig
|
config, NutanixImplConfig
|
|
@ -4,8 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from llama_models.schema_utils import json_schema_type
|
from llama_models.schema_utils import json_schema_type
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
|
@ -14,7 +14,10 @@ from llama_models.llama3.api.datatypes import Message
|
||||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||||
|
|
||||||
from llama_stack.apis.inference import * # noqa: F403
|
from llama_stack.apis.inference import * # noqa: F403
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_model_alias,
|
||||||
|
ModelRegistryHelper,
|
||||||
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
get_sampling_options,
|
get_sampling_options,
|
||||||
process_chat_completion_response,
|
process_chat_completion_response,
|
||||||
|
@ -26,16 +29,18 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
|
|
||||||
from .config import NutanixImplConfig
|
from .config import NutanixImplConfig
|
||||||
|
|
||||||
NUTANIX_SUPPORTED_MODELS = {
|
|
||||||
"Llama3.1-8B-Instruct": "vllm-llama-3-1",
|
model_aliases = [
|
||||||
}
|
build_model_alias(
|
||||||
|
"vllm-llama-3-1",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
|
class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
def __init__(self, config: NutanixImplConfig) -> None:
|
def __init__(self, config: NutanixImplConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(
|
ModelRegistryHelper.__init__(self, model_aliases)
|
||||||
self, stack_to_provider_models_map=NUTANIX_SUPPORTED_MODELS
|
|
||||||
)
|
|
||||||
self.config = config
|
self.config = config
|
||||||
self.formatter = ChatFormat(Tokenizer.get_instance())
|
self.formatter = ChatFormat(Tokenizer.get_instance())
|
||||||
|
|
||||||
|
@ -47,7 +52,7 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
|
|
||||||
async def completion(
|
async def completion(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model_id: str,
|
||||||
content: InterleavedTextMedia,
|
content: InterleavedTextMedia,
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||||
response_format: Optional[ResponseFormat] = None,
|
response_format: Optional[ResponseFormat] = None,
|
||||||
|
@ -58,7 +63,7 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
|
|
||||||
async def chat_completion(
|
async def chat_completion(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model_id: str,
|
||||||
messages: List[Message],
|
messages: List[Message],
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||||
response_format: Optional[ResponseFormat] = None,
|
response_format: Optional[ResponseFormat] = None,
|
||||||
|
@ -110,8 +115,10 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
|
|
||||||
def _get_params(self, request: ChatCompletionRequest) -> dict:
|
def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||||
params = {
|
params = {
|
||||||
"model": self.map_to_provider_model(request.model),
|
"model": request.model,
|
||||||
"messages": chat_completion_request_to_messages(request, return_dict=True),
|
"messages": chat_completion_request_to_messages(
|
||||||
|
request, self.get_llama_model(request.model), return_dict=True
|
||||||
|
),
|
||||||
"stream": request.stream,
|
"stream": request.stream,
|
||||||
**get_sampling_options(request.sampling_params),
|
**get_sampling_options(request.sampling_params),
|
||||||
}
|
}
|
||||||
|
@ -119,7 +126,7 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
|
|
||||||
async def embeddings(
|
async def embeddings(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model_id: str,
|
||||||
contents: List[InterleavedTextMedia],
|
contents: List[InterleavedTextMedia],
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
|
@ -3,7 +3,7 @@ distribution_spec:
|
||||||
description: Use Nutanix AI Endpoint for running LLM inference
|
description: Use Nutanix AI Endpoint for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
inference: remote::nutanix
|
inference: remote::nutanix
|
||||||
memory: meta-reference
|
memory: inline::faiss
|
||||||
safety: meta-reference
|
safety: inline::llama-guard
|
||||||
agents: meta-reference
|
agents: inline::meta-reference
|
||||||
telemetry: meta-reference
|
telemetry: inline::meta-reference
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue