Convert TGI to work with openai_compat

This commit is contained in:
Ashwin Bharambe 2024-10-08 12:57:34 -07:00 committed by Ashwin Bharambe
parent 05e73d12b3
commit ed899a5dec
6 changed files with 133 additions and 338 deletions

View file

@ -3,8 +3,11 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Tuple
from llama_models.llama3.api.chat_format import ChatFormat
from termcolor import cprint
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.inference import * # noqa: F403
from llama_models.datatypes import ModelFamily
@ -28,6 +31,17 @@ def chat_completion_request_to_prompt(
return formatter.tokenizer.decode(model_input.tokens)
def chat_completion_request_to_model_input_info(
request: ChatCompletionRequest, formatter: ChatFormat
) -> Tuple[str, int]:
messages = augment_messages_for_tools(request)
model_input = formatter.encode_dialog_prompt(messages)
return (
formatter.tokenizer.decode(model_input.tokens),
len(model_input.tokens),
)
def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]:
"""Reads chat completion request and augments the messages to handle tools.
For eg. for llama_3_1, add system message with the appropriate tools or

View file

@ -60,6 +60,8 @@ def process_chat_completion_response(
if reason := choice.finish_reason:
if reason in ["stop", "eos"]:
stop_reason = StopReason.end_of_turn
elif reason == "eom":
stop_reason = StopReason.end_of_message
elif reason == "length":
stop_reason = StopReason.out_of_tokens
@ -96,7 +98,7 @@ async def process_chat_completion_stream_response(
finish_reason = choice.finish_reason
if finish_reason:
if stop_reason is None and finish_reason in ["stop", "eos"]:
if stop_reason is None and finish_reason in ["stop", "eos", "eos_token"]:
stop_reason = StopReason.end_of_turn
elif stop_reason is None and finish_reason == "length":
stop_reason = StopReason.out_of_tokens
@ -118,16 +120,16 @@ async def process_chat_completion_stream_response(
buffer += text
continue
if ipython:
if text == "<|eot_id|>":
stop_reason = StopReason.end_of_turn
text = ""
continue
elif text == "<|eom_id|>":
stop_reason = StopReason.end_of_message
text = ""
continue
if text == "<|eot_id|>":
stop_reason = StopReason.end_of_turn
text = ""
continue
elif text == "<|eom_id|>":
stop_reason = StopReason.end_of_message
text = ""
continue
if ipython:
buffer += text
delta = ToolCallDelta(
content=text,