From cfaf9e0e8b18f9ae53ed27c331bccbd52cd86ff4 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Apr 2025 11:00:48 -0700 Subject: [PATCH] revert some unintentional changes by copying source of truth to llama-models --- docs/_static/llama-stack-spec.html | 132 ++++++++++-------- docs/_static/llama-stack-spec.yaml | 52 +++---- llama_stack/models/llama/datatypes.py | 9 +- .../llama/llama3/quantization/__init__.py | 5 + .../models/llama/llama4/chat_format.py | 15 +- llama_stack/models/llama/llama4/generation.py | 16 --- .../llama/llama4/quantization/__init__.py | 5 + llama_stack/models/llama/llama4/tokenizer.py | 10 -- .../providers/remote/inference/vllm/vllm.py | 2 + 9 files changed, 133 insertions(+), 113 deletions(-) create mode 100644 llama_stack/models/llama/llama3/quantization/__init__.py create mode 100644 llama_stack/models/llama/llama4/quantization/__init__.py diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index f94f2a578..567110829 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -4163,70 +4163,80 @@ ] }, "arguments": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - }, - { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] } - ] - } - }, - { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" + }, + { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] } - ] - } + } + ] } - ] - } + } + ] + }, + "arguments_json": { + "type": "string" } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 238f8dcd0..1dfd17f55 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -2890,30 +2890,34 @@ components: title: BuiltinTool - type: string arguments: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - - type: array - items: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' + oneOf: + - type: string + - type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + - type: array + items: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + - type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + arguments_json: + type: string additionalProperties: false required: - call_id diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index 106875bb2..48cb51005 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -38,7 +38,14 @@ RecursiveType = Union[Primitive, List[Primitive], Dict[str, Primitive]] class ToolCall(BaseModel): call_id: str tool_name: Union[BuiltinTool, str] - arguments: Dict[str, RecursiveType] + # Plan is to deprecate the Dict in favor of a JSON string + # that is parsed on the client side instead of trying to manage + # the recursive type here. + # Making this a union so that client side can start prepping for this change. + # Eventually, we will remove both the Dict and arguments_json field, + # and arguments will just be a str + arguments: Union[str, Dict[str, RecursiveType]] + arguments_json: Optional[str] = None @field_validator("tool_name", mode="before") @classmethod diff --git a/llama_stack/models/llama/llama3/quantization/__init__.py b/llama_stack/models/llama/llama3/quantization/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/models/llama/llama3/quantization/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/models/llama/llama4/chat_format.py b/llama_stack/models/llama/llama4/chat_format.py index 8f08b3a9e..160bb00f8 100644 --- a/llama_stack/models/llama/llama4/chat_format.py +++ b/llama_stack/models/llama/llama4/chat_format.py @@ -210,9 +210,12 @@ class ChatFormat: content = ToolUtils.encode_tool_call(t, tool_prompt_format) _process_content(content) + # Tool calls and Tool Response messages should be eom eom = False if message.role == "assistant": - eom = message.stop_reason == StopReason.end_of_message + eom = message.stop_reason == StopReason.end_of_message or message.tool_calls + elif message.role == "tool": + eom = True tokens.append(self.tokenizer.special_tokens["<|eom|>" if eom else "<|eot|>"]) return tokens, images @@ -247,6 +250,11 @@ class ChatFormat: if content.startswith(header_str): content = content[len(header_str) :] + ipython = content.startswith("<|python_start|>") + if ipython: + content = content[len("<|python_start|>") :] + content = content.replace("<|python_end|>", "") + if content.endswith("<|eot|>"): content = content[: -len("<|eot|>")] stop_reason = StopReason.end_of_turn @@ -277,6 +285,11 @@ class ChatFormat: } if tool_name in BuiltinTool.__members__: tool_name = BuiltinTool[tool_name] + elif ipython: + tool_name = BuiltinTool.code_interpreter + tool_arguments = { + "code": content, + } tool_calls = [] if tool_name is not None and tool_arguments is not None: diff --git a/llama_stack/models/llama/llama4/generation.py b/llama_stack/models/llama/llama4/generation.py index 8971835aa..20c4e5e58 100644 --- a/llama_stack/models/llama/llama4/generation.py +++ b/llama_stack/models/llama/llama4/generation.py @@ -4,22 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# top-level folder for each specific model found within the models/ directory at -# the top-level of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. - import codecs import io import json diff --git a/llama_stack/models/llama/llama4/quantization/__init__.py b/llama_stack/models/llama/llama4/quantization/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/models/llama/llama4/quantization/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py index 14250f681..4d271e5fd 100644 --- a/llama_stack/models/llama/llama4/tokenizer.py +++ b/llama_stack/models/llama/llama4/tokenizer.py @@ -4,16 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# top-level folder for each specific model found within the models/ directory at -# the top-level of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. - import os from logging import getLogger from pathlib import Path diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index e2f3a7b33..6a828322f 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -96,6 +96,7 @@ def _convert_to_vllm_tool_calls_in_response( call_id=call.id, tool_name=call.function.name, arguments=json.loads(call.function.arguments), + arguments_json=call.function.arguments, ) for call in tool_calls ] @@ -175,6 +176,7 @@ async def _process_vllm_chat_completion_stream_response( call_id=tool_call_buf.call_id, tool_name=tool_call_buf.tool_name, arguments=args, + arguments_json=args_str, ), parse_status=ToolCallParseStatus.succeeded, ),