This commit is contained in:
Edward Ma 2024-12-02 08:17:22 -08:00
parent d7b159663c
commit efd842d605

View file

@ -11,7 +11,7 @@ from llama_models.datatypes import CoreModelId, SamplingStrategy
from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.chat_format import ChatFormat
from llama_models.llama3.api.datatypes import Message, ImageMedia from llama_models.llama3.api.datatypes import Message
from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.llama3.api.tokenizer import Tokenizer
from openai import OpenAI from openai import OpenAI
@ -27,9 +27,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
process_chat_completion_stream_response, process_chat_completion_stream_response,
) )
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import convert_message_to_dict
convert_message_to_dict,
)
from .config import SambaNovaImplConfig from .config import SambaNovaImplConfig
@ -93,7 +91,7 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
logprobs: Optional[LogProbConfig] = None, logprobs: Optional[LogProbConfig] = None,
) -> AsyncGenerator: ) -> AsyncGenerator:
raise NotImplementedError() raise NotImplementedError()
async def chat_completion( async def chat_completion(
self, self,
model_id: str, model_id: str,
@ -125,7 +123,7 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
return self._stream_chat_completion(request_sambanova, client) return self._stream_chat_completion(request_sambanova, client)
else: else:
return await self._nonstream_chat_completion(request_sambanova, client) return await self._nonstream_chat_completion(request_sambanova, client)
async def _nonstream_chat_completion( async def _nonstream_chat_completion(
self, request: ChatCompletionRequest, client: OpenAI self, request: ChatCompletionRequest, client: OpenAI
) -> ChatCompletionResponse: ) -> ChatCompletionResponse:
@ -145,18 +143,22 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
stream, self.formatter stream, self.formatter
): ):
yield chunk yield chunk
async def embeddings( async def embeddings(
self, self,
model_id: str, model_id: str,
contents: List[InterleavedTextMedia], contents: List[InterleavedTextMedia],
) -> EmbeddingsResponse: ) -> EmbeddingsResponse:
raise NotImplementedError() raise NotImplementedError()
async def convert_chat_completion_request(self, request: ChatCompletionRequest) -> dict: async def convert_chat_completion_request(
self, request: ChatCompletionRequest
) -> dict:
compatible_request = self.convert_sampling_params(request.sampling_params) compatible_request = self.convert_sampling_params(request.sampling_params)
compatible_request["model"] = request.model compatible_request["model"] = request.model
compatible_request["messages"] = await self.convert_to_sambanova_message(request.messages) compatible_request["messages"] = await self.convert_to_sambanova_message(
request.messages
)
compatible_request["stream"] = request.stream compatible_request["stream"] = request.stream
compatible_request["logprobs"] = False compatible_request["logprobs"] = False
compatible_request["extra_headers"] = { compatible_request["extra_headers"] = {
@ -164,7 +166,9 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
} }
return compatible_request return compatible_request
def convert_sampling_params(self, sampling_params: SamplingParams, legacy: bool = False) -> dict: def convert_sampling_params(
self, sampling_params: SamplingParams, legacy: bool = False
) -> dict:
params = {} params = {}
if sampling_params: if sampling_params:
@ -182,14 +186,14 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
params["extra_body"]["top_k"] = sampling_params.top_k params["extra_body"]["top_k"] = sampling_params.top_k
elif sampling_params.strategy == "greedy": elif sampling_params.strategy == "greedy":
params["temperature"] = sampling_params.temperature params["temperature"] = sampling_params.temperature
return params return params
async def convert_to_sambanova_message(self, messages: List[Message]) -> List[dict]: async def convert_to_sambanova_message(self, messages: List[Message]) -> List[dict]:
conversation = [] conversation = []
for message in messages: for message in messages:
content = await convert_message_to_dict(message) content = await convert_message_to_dict(message)
# Need to override role # Need to override role
if isinstance(message, UserMessage): if isinstance(message, UserMessage):
content["role"] = "user" content["role"] = "user"
@ -197,14 +201,16 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
content["role"] = "assistant" content["role"] = "assistant"
tools = [] tools = []
for tool_call in message.tool_calls: for tool_call in message.tool_calls:
tools.append({ tools.append(
"id": tool_call.call_id, {
"function": { "id": tool_call.call_id,
"name": tool_call.name, "function": {
"arguments": json.dumps(tool_call.arguments), "name": tool_call.name,
}, "arguments": json.dumps(tool_call.arguments),
"type": "function", },
}) "type": "function",
}
)
content["tool_calls"] = tools content["tool_calls"] = tools
elif isinstance(message, ToolResponseMessage): elif isinstance(message, ToolResponseMessage):
content["role"] = "tool" content["role"] = "tool"
@ -215,5 +221,3 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
conversation.append(content) conversation.append(content)
return conversation return conversation