mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
chore: move all Llama Stack types from llama-models to llama-stack (#1098)
llama-models should have extremely minimal cruft. Its sole purpose should be didactic -- show the simplest implementation of the llama models and document the prompt formats, etc. This PR is the complement to https://github.com/meta-llama/llama-models/pull/279 ## Test Plan Ensure all `llama` CLI `model` sub-commands work: ```bash llama model list llama model download --model-id ... llama model prompt-format -m ... ``` Ran tests: ```bash cd tests/client-sdk LLAMA_STACK_CONFIG=fireworks pytest -s -v inference/ LLAMA_STACK_CONFIG=fireworks pytest -s -v vector_io/ LLAMA_STACK_CONFIG=fireworks pytest -s -v agents/ ``` Create a fresh venv `uv venv && source .venv/bin/activate` and run `llama stack build --template fireworks --image-type venv` followed by `llama stack run together --image-type venv` <-- the server runs Also checked that the OpenAPI generator can run and there is no change in the generated files as a result. ```bash cd docs/openapi_generator sh run_openapi_generator.sh ```
This commit is contained in:
parent
c0ee512980
commit
314ee09ae3
138 changed files with 8491 additions and 465 deletions
257
llama_stack/models/llama/llama3/interface.py
Normal file
257
llama_stack/models/llama/llama3/interface.py
Normal file
|
@ -0,0 +1,257 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# top-level folder for each specific model found within the models/ directory at
|
||||
# the top-level of this source tree.
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from llama_models.datatypes import (
|
||||
BuiltinTool,
|
||||
RawMessage,
|
||||
StopReason,
|
||||
ToolCall,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
from termcolor import colored
|
||||
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition
|
||||
|
||||
from . import template_data
|
||||
from .prompt_templates import (
|
||||
BuiltinToolGenerator,
|
||||
FunctionTagCustomToolGenerator,
|
||||
JsonCustomToolGenerator,
|
||||
SystemDefaultGenerator,
|
||||
ToolResponseGenerator,
|
||||
)
|
||||
|
||||
THIS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
class Template:
|
||||
def __init__(
|
||||
self,
|
||||
role,
|
||||
template_name,
|
||||
data_provider=None,
|
||||
notes=None,
|
||||
):
|
||||
self.role = role
|
||||
self.template_name = template_name
|
||||
self.data_provider = data_provider or ""
|
||||
self._notes = notes or ""
|
||||
|
||||
@property
|
||||
def notes(self):
|
||||
default = "↵ represents newline"
|
||||
notes = default
|
||||
if self._notes:
|
||||
notes += "\n"
|
||||
notes += self._notes
|
||||
return notes
|
||||
|
||||
|
||||
TEMPLATES = [
|
||||
Template(
|
||||
"user",
|
||||
"user-default",
|
||||
"user_default",
|
||||
),
|
||||
Template(
|
||||
"user",
|
||||
"user-images",
|
||||
"user_images",
|
||||
),
|
||||
Template("user", "user-interleaved-images", "user_interleaved_images"),
|
||||
Template(
|
||||
"assistant",
|
||||
"assistant-builtin-tool-call",
|
||||
"assistant_builtin_tool_call",
|
||||
"Notice <|python_tag|>",
|
||||
),
|
||||
Template(
|
||||
"assistant",
|
||||
"assistant-custom-tool-call",
|
||||
"assistant_custom_tool_call",
|
||||
"Notice <function=...> format",
|
||||
),
|
||||
Template(
|
||||
"assistant",
|
||||
"assistant-default",
|
||||
"assistant_default",
|
||||
),
|
||||
Template(
|
||||
"system",
|
||||
"system-builtin-and-custom-tools",
|
||||
"system_message_builtin_and_custom_tools",
|
||||
),
|
||||
Template(
|
||||
"system",
|
||||
"system-builtin-tools-only",
|
||||
"system_message_builtin_tools_only",
|
||||
),
|
||||
Template(
|
||||
"system",
|
||||
"system-custom-tools-only",
|
||||
"system_message_custom_tools_only",
|
||||
),
|
||||
Template(
|
||||
"system",
|
||||
"system-default",
|
||||
"system_default",
|
||||
),
|
||||
Template(
|
||||
"tool",
|
||||
"tool-success",
|
||||
"tool_success",
|
||||
"Note ipython header and [stdout]",
|
||||
),
|
||||
Template(
|
||||
"tool",
|
||||
"tool-failure",
|
||||
"tool_failure",
|
||||
"Note ipython header and [stderr]",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class LLama31Interface:
|
||||
def __init__(self, tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json):
|
||||
self.tokenizer = Tokenizer.get_instance()
|
||||
self.formatter = ChatFormat(self.tokenizer)
|
||||
self.tool_prompt_format = tool_prompt_format
|
||||
|
||||
def get_tokens(self, messages: List[RawMessage]) -> List[int]:
|
||||
model_input = self.formatter.encode_dialog_prompt(
|
||||
messages,
|
||||
self.tool_prompt_format,
|
||||
)
|
||||
return model_input.tokens
|
||||
|
||||
def tool_response_messages(self, *args, **kwargs):
|
||||
template = ToolResponseGenerator().gen(*args, **kwargs)
|
||||
return [
|
||||
RawMessage(
|
||||
role="tool",
|
||||
content=template.render(),
|
||||
)
|
||||
]
|
||||
|
||||
def system_messages(
|
||||
self,
|
||||
builtin_tools: List[BuiltinTool],
|
||||
custom_tools: List[ToolDefinition],
|
||||
instruction: Optional[str] = None,
|
||||
) -> List[RawMessage]:
|
||||
messages = []
|
||||
|
||||
default_gen = SystemDefaultGenerator()
|
||||
default_template = default_gen.gen()
|
||||
|
||||
sys_content = ""
|
||||
|
||||
tool_template = None
|
||||
if builtin_tools or custom_tools:
|
||||
tool_gen = BuiltinToolGenerator()
|
||||
tool_template = tool_gen.gen(builtin_tools + custom_tools)
|
||||
|
||||
sys_content += tool_template.render()
|
||||
sys_content += "\n"
|
||||
|
||||
sys_content += default_template.render()
|
||||
|
||||
if instruction:
|
||||
sys_content += "\n\n"
|
||||
sys_content += instruction
|
||||
|
||||
sys_content += "\n"
|
||||
messages.append(RawMessage(role="system", content=sys_content))
|
||||
|
||||
if custom_tools:
|
||||
if self.tool_prompt_format == ToolPromptFormat.json:
|
||||
tool_gen = JsonCustomToolGenerator()
|
||||
elif self.tool_prompt_format == ToolPromptFormat.function_tag:
|
||||
tool_gen = FunctionTagCustomToolGenerator()
|
||||
else:
|
||||
raise ValueError(f"Non supported ToolPromptFormat {self.tool_prompt_format}")
|
||||
|
||||
custom_template = tool_gen.gen(custom_tools)
|
||||
messages.append(RawMessage(role="user", content=custom_template.render()))
|
||||
|
||||
return messages
|
||||
|
||||
def assistant_response_messages(
|
||||
self,
|
||||
content: str,
|
||||
stop_reason: StopReason,
|
||||
tool_call: Optional[ToolCall] = None,
|
||||
) -> List[RawMessage]:
|
||||
tool_calls = []
|
||||
if tool_call:
|
||||
tool_calls.append(tool_call)
|
||||
return [
|
||||
RawMessage(
|
||||
role="assistant",
|
||||
content=content,
|
||||
tool_calls=tool_calls,
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
]
|
||||
|
||||
def user_message(self, content: str) -> List[RawMessage]:
|
||||
return [RawMessage(role="user", content=content)]
|
||||
|
||||
def display_message_as_tokens(self, message: RawMessage) -> None:
|
||||
"""Util to print tokenized string to shell"""
|
||||
tokens = self.formatter.encode_message(message, self.tool_prompt_format)
|
||||
on_colors = [
|
||||
"on_red",
|
||||
"on_green",
|
||||
"on_yellow",
|
||||
"on_blue",
|
||||
"on_magenta",
|
||||
"on_cyan",
|
||||
]
|
||||
for i, t in enumerate(tokens):
|
||||
on_col = on_colors[i % len(on_colors)]
|
||||
print(colored(self.tokenizer.decode([t]), "white", on_col), end="")
|
||||
print("\n", end="")
|
||||
|
||||
|
||||
def list_jinja_templates() -> List[Template]:
|
||||
return TEMPLATES
|
||||
|
||||
|
||||
def render_jinja_template(name: str, tool_prompt_format: ToolPromptFormat):
|
||||
by_name = {t.template_name: t for t in TEMPLATES}
|
||||
if name not in by_name:
|
||||
raise ValueError(f"No template found for `{name}`")
|
||||
|
||||
template = by_name[name]
|
||||
interface = LLama31Interface(tool_prompt_format)
|
||||
|
||||
data_func = getattr(template_data, template.data_provider)
|
||||
if template.role == "system":
|
||||
messages = interface.system_messages(**data_func())
|
||||
elif template.role == "tool":
|
||||
messages = interface.tool_response_messages(**data_func())
|
||||
elif template.role == "assistant":
|
||||
messages = interface.assistant_response_messages(**data_func())
|
||||
elif template.role == "user":
|
||||
messages = interface.user_message(**data_func())
|
||||
|
||||
tokens = interface.get_tokens(messages)
|
||||
special_tokens = list(interface.tokenizer.special_tokens.values())
|
||||
tokens = [(interface.tokenizer.decode([t]), t in special_tokens) for t in tokens]
|
||||
return template, tokens
|
Loading…
Add table
Add a link
Reference in a new issue