refactor according to repo updates

This commit is contained in:
Jinan Zhou 2024-11-20 07:49:32 +00:00
parent 64c5d38ae9
commit 111e32ffe5
4 changed files with 24 additions and 18 deletions

View file

@ -7,6 +7,7 @@
from .config import NutanixImplConfig from .config import NutanixImplConfig
from .nutanix import NutanixInferenceAdapter from .nutanix import NutanixInferenceAdapter
async def get_adapter_impl(config: NutanixInferenceAdapter, _deps): async def get_adapter_impl(config: NutanixInferenceAdapter, _deps):
assert isinstance( assert isinstance(
config, NutanixImplConfig config, NutanixImplConfig

View file

@ -4,8 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Optional
from llama_models.schema_utils import json_schema_type from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, Field from pydantic import BaseModel, Field

View file

@ -14,7 +14,10 @@ from llama_models.llama3.api.datatypes import Message
from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.llama3.api.tokenizer import Tokenizer
from llama_stack.apis.inference import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
ModelRegistryHelper,
)
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
get_sampling_options, get_sampling_options,
process_chat_completion_response, process_chat_completion_response,
@ -26,16 +29,18 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import NutanixImplConfig from .config import NutanixImplConfig
NUTANIX_SUPPORTED_MODELS = {
"Llama3.1-8B-Instruct": "vllm-llama-3-1", model_aliases = [
} build_model_alias(
"vllm-llama-3-1",
CoreModelId.llama3_1_8b_instruct.value,
),
]
class NutanixInferenceAdapter(ModelRegistryHelper, Inference): class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
def __init__(self, config: NutanixImplConfig) -> None: def __init__(self, config: NutanixImplConfig) -> None:
ModelRegistryHelper.__init__( ModelRegistryHelper.__init__(self, model_aliases)
self, stack_to_provider_models_map=NUTANIX_SUPPORTED_MODELS
)
self.config = config self.config = config
self.formatter = ChatFormat(Tokenizer.get_instance()) self.formatter = ChatFormat(Tokenizer.get_instance())
@ -47,7 +52,7 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
async def completion( async def completion(
self, self,
model: str, model_id: str,
content: InterleavedTextMedia, content: InterleavedTextMedia,
sampling_params: Optional[SamplingParams] = SamplingParams(), sampling_params: Optional[SamplingParams] = SamplingParams(),
response_format: Optional[ResponseFormat] = None, response_format: Optional[ResponseFormat] = None,
@ -58,7 +63,7 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
async def chat_completion( async def chat_completion(
self, self,
model: str, model_id: str,
messages: List[Message], messages: List[Message],
sampling_params: Optional[SamplingParams] = SamplingParams(), sampling_params: Optional[SamplingParams] = SamplingParams(),
response_format: Optional[ResponseFormat] = None, response_format: Optional[ResponseFormat] = None,
@ -110,8 +115,10 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
def _get_params(self, request: ChatCompletionRequest) -> dict: def _get_params(self, request: ChatCompletionRequest) -> dict:
params = { params = {
"model": self.map_to_provider_model(request.model), "model": request.model,
"messages": chat_completion_request_to_messages(request, return_dict=True), "messages": chat_completion_request_to_messages(
request, self.get_llama_model(request.model), return_dict=True
),
"stream": request.stream, "stream": request.stream,
**get_sampling_options(request.sampling_params), **get_sampling_options(request.sampling_params),
} }
@ -119,7 +126,7 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference):
async def embeddings( async def embeddings(
self, self,
model: str, model_id: str,
contents: List[InterleavedTextMedia], contents: List[InterleavedTextMedia],
) -> EmbeddingsResponse: ) -> EmbeddingsResponse:
raise NotImplementedError() raise NotImplementedError()

View file

@ -3,7 +3,7 @@ distribution_spec:
description: Use Nutanix AI Endpoint for running LLM inference description: Use Nutanix AI Endpoint for running LLM inference
providers: providers:
inference: remote::nutanix inference: remote::nutanix
memory: meta-reference memory: inline::faiss
safety: meta-reference safety: inline::llama-guard
agents: meta-reference agents: inline::meta-reference
telemetry: meta-reference telemetry: inline::meta-reference