Inference to use provider resource id to register and validate (#428)

This PR changes the way model id gets translated to the final model name
that gets passed through the provider.
Major changes include:
1) Providers are responsible for registering an object and as part of
the registration returning the object with the correct provider specific
name of the model provider_resource_id
2) To help with the common look ups different names a new ModelLookup
class is created.



Tested all inference providers including together, fireworks, vllm,
ollama, meta reference and bedrock
This commit is contained in:
Dinesh Yeduguru 2024-11-12 20:02:00 -08:00 committed by GitHub
parent e51107e019
commit fdff24e77a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 460 additions and 290 deletions

View file

@ -147,17 +147,17 @@ def augment_content_with_response_format_prompt(response_format, content):
def chat_completion_request_to_prompt(
request: ChatCompletionRequest, formatter: ChatFormat
request: ChatCompletionRequest, llama_model: str, formatter: ChatFormat
) -> str:
messages = chat_completion_request_to_messages(request)
messages = chat_completion_request_to_messages(request, llama_model)
model_input = formatter.encode_dialog_prompt(messages)
return formatter.tokenizer.decode(model_input.tokens)
def chat_completion_request_to_model_input_info(
request: ChatCompletionRequest, formatter: ChatFormat
request: ChatCompletionRequest, llama_model: str, formatter: ChatFormat
) -> Tuple[str, int]:
messages = chat_completion_request_to_messages(request)
messages = chat_completion_request_to_messages(request, llama_model)
model_input = formatter.encode_dialog_prompt(messages)
return (
formatter.tokenizer.decode(model_input.tokens),
@ -167,14 +167,15 @@ def chat_completion_request_to_model_input_info(
def chat_completion_request_to_messages(
request: ChatCompletionRequest,
llama_model: str,
) -> List[Message]:
"""Reads chat completion request and augments the messages to handle tools.
For eg. for llama_3_1, add system message with the appropriate tools or
add user messsage for custom tools, etc.
"""
model = resolve_model(request.model)
model = resolve_model(llama_model)
if model is None:
cprint(f"Could not resolve model {request.model}", color="red")
cprint(f"Could not resolve model {llama_model}", color="red")
return request.messages
if model.descriptor() not in supported_inference_models():