diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index f94f2a578..567110829 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -4163,70 +4163,80 @@
]
},
"arguments": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "type": "integer"
- },
- {
- "type": "number"
- },
- {
- "type": "boolean"
- },
- {
- "type": "null"
- },
- {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "type": "integer"
- },
- {
- "type": "number"
- },
- {
- "type": "boolean"
- },
- {
- "type": "null"
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "integer"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "null"
+ },
+ {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "integer"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "null"
+ }
+ ]
}
- ]
- }
- },
- {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "type": "integer"
- },
- {
- "type": "number"
- },
- {
- "type": "boolean"
- },
- {
- "type": "null"
+ },
+ {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "integer"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "null"
+ }
+ ]
}
- ]
- }
+ }
+ ]
}
- ]
- }
+ }
+ ]
+ },
+ "arguments_json": {
+ "type": "string"
}
},
"additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 238f8dcd0..1dfd17f55 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2890,30 +2890,34 @@ components:
title: BuiltinTool
- type: string
arguments:
- type: object
- additionalProperties:
- oneOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- - type: array
- items:
- oneOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- - type: object
- additionalProperties:
- oneOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
+ oneOf:
+ - type: string
+ - type: object
+ additionalProperties:
+ oneOf:
+ - type: string
+ - type: integer
+ - type: number
+ - type: boolean
+ - type: 'null'
+ - type: array
+ items:
+ oneOf:
+ - type: string
+ - type: integer
+ - type: number
+ - type: boolean
+ - type: 'null'
+ - type: object
+ additionalProperties:
+ oneOf:
+ - type: string
+ - type: integer
+ - type: number
+ - type: boolean
+ - type: 'null'
+ arguments_json:
+ type: string
additionalProperties: false
required:
- call_id
diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py
index 106875bb2..48cb51005 100644
--- a/llama_stack/models/llama/datatypes.py
+++ b/llama_stack/models/llama/datatypes.py
@@ -38,7 +38,14 @@ RecursiveType = Union[Primitive, List[Primitive], Dict[str, Primitive]]
class ToolCall(BaseModel):
call_id: str
tool_name: Union[BuiltinTool, str]
- arguments: Dict[str, RecursiveType]
+ # Plan is to deprecate the Dict in favor of a JSON string
+ # that is parsed on the client side instead of trying to manage
+ # the recursive type here.
+ # Making this a union so that client side can start prepping for this change.
+ # Eventually, we will remove both the Dict and arguments_json field,
+ # and arguments will just be a str
+ arguments: Union[str, Dict[str, RecursiveType]]
+ arguments_json: Optional[str] = None
@field_validator("tool_name", mode="before")
@classmethod
diff --git a/llama_stack/models/llama/llama3/quantization/__init__.py b/llama_stack/models/llama/llama3/quantization/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/models/llama/llama3/quantization/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/models/llama/llama4/chat_format.py b/llama_stack/models/llama/llama4/chat_format.py
index 8f08b3a9e..160bb00f8 100644
--- a/llama_stack/models/llama/llama4/chat_format.py
+++ b/llama_stack/models/llama/llama4/chat_format.py
@@ -210,9 +210,12 @@ class ChatFormat:
content = ToolUtils.encode_tool_call(t, tool_prompt_format)
_process_content(content)
+ # Tool calls and Tool Response messages should be eom
eom = False
if message.role == "assistant":
- eom = message.stop_reason == StopReason.end_of_message
+ eom = message.stop_reason == StopReason.end_of_message or message.tool_calls
+ elif message.role == "tool":
+ eom = True
tokens.append(self.tokenizer.special_tokens["<|eom|>" if eom else "<|eot|>"])
return tokens, images
@@ -247,6 +250,11 @@ class ChatFormat:
if content.startswith(header_str):
content = content[len(header_str) :]
+ ipython = content.startswith("<|python_start|>")
+ if ipython:
+ content = content[len("<|python_start|>") :]
+ content = content.replace("<|python_end|>", "")
+
if content.endswith("<|eot|>"):
content = content[: -len("<|eot|>")]
stop_reason = StopReason.end_of_turn
@@ -277,6 +285,11 @@ class ChatFormat:
}
if tool_name in BuiltinTool.__members__:
tool_name = BuiltinTool[tool_name]
+ elif ipython:
+ tool_name = BuiltinTool.code_interpreter
+ tool_arguments = {
+ "code": content,
+ }
tool_calls = []
if tool_name is not None and tool_arguments is not None:
diff --git a/llama_stack/models/llama/llama4/generation.py b/llama_stack/models/llama/llama4/generation.py
index 8971835aa..20c4e5e58 100644
--- a/llama_stack/models/llama/llama4/generation.py
+++ b/llama_stack/models/llama/llama4/generation.py
@@ -4,22 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# top-level folder for each specific model found within the models/ directory at
-# the top-level of this source tree.
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.
-
import codecs
import io
import json
diff --git a/llama_stack/models/llama/llama4/quantization/__init__.py b/llama_stack/models/llama/llama4/quantization/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/models/llama/llama4/quantization/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py
index 14250f681..4d271e5fd 100644
--- a/llama_stack/models/llama/llama4/tokenizer.py
+++ b/llama_stack/models/llama/llama4/tokenizer.py
@@ -4,16 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# top-level folder for each specific model found within the models/ directory at
-# the top-level of this source tree.
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.
-
import os
from logging import getLogger
from pathlib import Path
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index e2f3a7b33..6a828322f 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -96,6 +96,7 @@ def _convert_to_vllm_tool_calls_in_response(
call_id=call.id,
tool_name=call.function.name,
arguments=json.loads(call.function.arguments),
+ arguments_json=call.function.arguments,
)
for call in tool_calls
]
@@ -175,6 +176,7 @@ async def _process_vllm_chat_completion_stream_response(
call_id=tool_call_buf.call_id,
tool_name=tool_call_buf.tool_name,
arguments=args,
+ arguments_json=args_str,
),
parse_status=ToolCallParseStatus.succeeded,
),