Merge branch 'main' into add-watsonx-inference-adapter

This commit is contained in:
Sajikumar JS 2025-04-07 10:42:30 +05:30
commit 5366c423ae
10 changed files with 1355 additions and 26 deletions

View file

@ -279,6 +279,10 @@ class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801
{% endif -%}
{%- endfor %}
]
You can answer general questions or invoke tools when necessary.
In addition to tool calls, you should also augment your responses by using the tool outputs.
"""
)
return PromptTemplate(

View file

@ -216,9 +216,12 @@ class ChatFormat:
content = ToolUtils.encode_tool_call(t, tool_prompt_format)
_process_content(content)
# Tool calls and Tool Response messages should be eom
eom = False
if message.role == "assistant":
eom = message.stop_reason == StopReason.end_of_message
eom = message.stop_reason == StopReason.end_of_message or message.tool_calls
elif message.role == "tool":
eom = True
tokens.append(self.tokenizer.special_tokens["<|eom|>" if eom else "<|eot|>"])
return tokens, images

View file

@ -6,8 +6,11 @@
import asyncio
import logging
import os
from typing import AsyncGenerator, List, Optional, Union
from termcolor import cprint
from llama_stack.apis.common.content_types import (
TextDelta,
ToolCallDelta,
@ -338,6 +341,9 @@ class MetaReferenceInferenceImpl(
stop_reason = None
for token_result in self.generator.chat_completion(request):
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
cprint(token_result.text, "cyan", end="")
tokens.append(token_result.token)
if token_result.token == tokenizer.eot_id:
@ -386,6 +392,9 @@ class MetaReferenceInferenceImpl(
ipython = False
for token_result in self.generator.chat_completion(request):
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
cprint(token_result.text, "cyan", end="")
tokens.append(token_result.token)
if not ipython and token_result.text.startswith("<|python_tag|>"):