mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-09 11:20:58 +00:00
Convert TGI to work with openai_compat
This commit is contained in:
parent
05e73d12b3
commit
ed899a5dec
6 changed files with 133 additions and 338 deletions
|
|
@ -3,8 +3,11 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Tuple
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_models.datatypes import ModelFamily
|
||||
|
|
@ -28,6 +31,17 @@ def chat_completion_request_to_prompt(
|
|||
return formatter.tokenizer.decode(model_input.tokens)
|
||||
|
||||
|
||||
def chat_completion_request_to_model_input_info(
|
||||
request: ChatCompletionRequest, formatter: ChatFormat
|
||||
) -> Tuple[str, int]:
|
||||
messages = augment_messages_for_tools(request)
|
||||
model_input = formatter.encode_dialog_prompt(messages)
|
||||
return (
|
||||
formatter.tokenizer.decode(model_input.tokens),
|
||||
len(model_input.tokens),
|
||||
)
|
||||
|
||||
|
||||
def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]:
|
||||
"""Reads chat completion request and augments the messages to handle tools.
|
||||
For eg. for llama_3_1, add system message with the appropriate tools or
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue