working fireworks and together

This commit is contained in:
Dinesh Yeduguru 2024-11-12 13:07:35 -08:00
parent 25d8ab0e14
commit 8de4cee373
8 changed files with 205 additions and 86 deletions

View file

@ -4,32 +4,54 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Dict
from llama_models.sku_list import resolve_model
from collections import namedtuple
from typing import List
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"])
class ModelLookup:
def __init__(
self,
model_aliases: List[ModelAlias],
):
self.alias_to_provider_id_map = {}
self.provider_id_to_llama_model_map = {}
for alias_obj in model_aliases:
for alias in alias_obj.aliases:
self.alias_to_provider_id_map[alias] = alias_obj.provider_model_id
# also add a mapping from provider model id to itself for easy lookup
self.alias_to_provider_id_map[alias_obj.provider_model_id] = (
alias_obj.provider_model_id
)
self.provider_id_to_llama_model_map[alias_obj.provider_model_id] = (
alias_obj.llama_model
)
def get_provider_model_id(self, identifier: str) -> str:
if identifier in self.alias_to_provider_id_map:
return self.alias_to_provider_id_map[identifier]
else:
raise ValueError(f"Unknown model: `{identifier}`")
class ModelRegistryHelper(ModelsProtocolPrivate):
def __init__(self, stack_to_provider_models_map: Dict[str, str]):
self.stack_to_provider_models_map = stack_to_provider_models_map
def __init__(self, model_aliases: List[ModelAlias]):
self.model_lookup = ModelLookup(model_aliases)
def map_to_provider_model(self, identifier: str) -> str:
model = resolve_model(identifier)
if not model:
raise ValueError(f"Unknown model: `{identifier}`")
def get_llama_model(self, provider_model_id: str) -> str:
return self.model_lookup.provider_id_to_llama_model_map[provider_model_id]
if identifier not in self.stack_to_provider_models_map:
raise ValueError(
f"Model {identifier} not found in map {self.stack_to_provider_models_map}"
)
async def register_model(self, model: Model) -> Model:
provider_model_id = self.model_lookup.get_provider_model_id(
model.provider_resource_id
)
if not provider_model_id:
raise ValueError(f"Unknown model: `{model.provider_resource_id}`")
return self.stack_to_provider_models_map[identifier]
model.provider_resource_id = provider_model_id
async def register_model(self, model: Model) -> None:
if model.provider_resource_id not in self.stack_to_provider_models_map:
raise ValueError(
f"Unsupported model {model.provider_resource_id}. Supported models: {self.stack_to_provider_models_map.keys()}"
)
return model

View file

@ -147,17 +147,17 @@ def augment_content_with_response_format_prompt(response_format, content):
def chat_completion_request_to_prompt(
request: ChatCompletionRequest, formatter: ChatFormat
request: ChatCompletionRequest, llama_model: str, formatter: ChatFormat
) -> str:
messages = chat_completion_request_to_messages(request)
messages = chat_completion_request_to_messages(request, llama_model)
model_input = formatter.encode_dialog_prompt(messages)
return formatter.tokenizer.decode(model_input.tokens)
def chat_completion_request_to_model_input_info(
request: ChatCompletionRequest, formatter: ChatFormat
request: ChatCompletionRequest, llama_model: str, formatter: ChatFormat
) -> Tuple[str, int]:
messages = chat_completion_request_to_messages(request)
messages = chat_completion_request_to_messages(request, llama_model)
model_input = formatter.encode_dialog_prompt(messages)
return (
formatter.tokenizer.decode(model_input.tokens),
@ -167,14 +167,15 @@ def chat_completion_request_to_model_input_info(
def chat_completion_request_to_messages(
request: ChatCompletionRequest,
llama_model: str,
) -> List[Message]:
"""Reads chat completion request and augments the messages to handle tools.
For eg. for llama_3_1, add system message with the appropriate tools or
add user messsage for custom tools, etc.
"""
model = resolve_model(request.model)
model = resolve_model(llama_model)
if model is None:
cprint(f"Could not resolve model {request.model}", color="red")
cprint(f"Could not resolve model {llama_model}", color="red")
return request.messages
if model.descriptor() not in supported_inference_models():