mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-05 10:13:05 +00:00
final fixes
This commit is contained in:
parent
971566fd74
commit
d3ebc18559
3 changed files with 11 additions and 15 deletions
|
@ -203,14 +203,8 @@ class ChatFormat:
|
|||
tokens.extend(toks)
|
||||
images.extend(imgs)
|
||||
|
||||
# if message.role == "assistant" and len(message.tool_calls) > 0:
|
||||
# tokens.append(self.tokenizer.special_tokens["<|python_start|>"])
|
||||
|
||||
_process_content(message.content)
|
||||
|
||||
# if message.role == "assistant" and len(message.tool_calls) > 0:
|
||||
# tokens.append(self.tokenizer.special_tokens["<|python_end|>"])
|
||||
|
||||
if message.role == "user" and message.context is not None:
|
||||
# This is RAG context; why is it here in the chat format? I don't think
|
||||
# this is needed and can be moved upwards
|
||||
|
@ -222,6 +216,7 @@ class ChatFormat:
|
|||
content = ToolUtils.encode_tool_call(t, tool_prompt_format)
|
||||
_process_content(content)
|
||||
|
||||
# Tool calls and Tool Response messages should be eom
|
||||
eom = False
|
||||
if message.role == "assistant":
|
||||
eom = message.stop_reason == StopReason.end_of_message or message.tool_calls
|
||||
|
|
|
@ -6,8 +6,11 @@
|
|||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from typing import AsyncGenerator, List, Optional, Union
|
||||
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
TextDelta,
|
||||
ToolCallDelta,
|
||||
|
@ -338,9 +341,8 @@ class MetaReferenceInferenceImpl(
|
|||
stop_reason = None
|
||||
|
||||
for token_result in self.generator.chat_completion(request):
|
||||
from termcolor import cprint
|
||||
|
||||
cprint(token_result.text, "cyan", end="")
|
||||
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
|
||||
cprint(token_result.text, "cyan", end="")
|
||||
|
||||
tokens.append(token_result.token)
|
||||
|
||||
|
@ -390,9 +392,8 @@ class MetaReferenceInferenceImpl(
|
|||
ipython = False
|
||||
|
||||
for token_result in self.generator.chat_completion(request):
|
||||
from termcolor import cprint
|
||||
|
||||
cprint(token_result.text, "cyan", end="")
|
||||
if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
|
||||
cprint(token_result.text, "cyan", end="")
|
||||
|
||||
tokens.append(token_result.token)
|
||||
|
||||
|
|
|
@ -128,7 +128,7 @@
|
|||
],
|
||||
"tool_responses": [
|
||||
{
|
||||
"response": "{'resposne': '70 degrees and foggy'}"
|
||||
"response": "{'response': '70 degrees and foggy'}"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
|
@ -174,7 +174,7 @@
|
|||
],
|
||||
"tool_responses": [
|
||||
{
|
||||
"response": "{'resposne': '70 degrees and foggy'}"
|
||||
"response": "{'response': '70 degrees and foggy'}"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
|
@ -398,7 +398,7 @@
|
|||
"response": "{'response': 'Total expenses for January 2025: $1000'}"
|
||||
},
|
||||
{
|
||||
"response": "{'resposne': 'Total expenses for February 2024: $2000'}"
|
||||
"response": "{'response': 'Total expenses for February 2024: $2000'}"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue