diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index dbd530aa3..c50554092 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -4347,24 +4347,6 @@
"type": "string",
"description": "Unique identifier for the tool call this response is for"
},
- "tool_name": {
- "oneOf": [
- {
- "type": "string",
- "enum": [
- "brave_search",
- "wolfram_alpha",
- "photogen",
- "code_interpreter"
- ],
- "title": "BuiltinTool"
- },
- {
- "type": "string"
- }
- ],
- "description": "Name of the tool that was called"
- },
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The response content from the tool"
@@ -4374,7 +4356,6 @@
"required": [
"role",
"call_id",
- "tool_name",
"content"
],
"title": "ToolResponseMessage",
@@ -4673,12 +4654,22 @@
"CompletionResponse": {
"type": "object",
"properties": {
+<<<<<<< dest: ed6caead724a - ehhuang: chore: simplify _get_tool_defs (#1384)
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
+||||||| base: 1311faf3f5e7 - ehhuang: fix: logging (#1598)
+=======
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricEvent"
+ }
+ },
+>>>>>>> source: ad32270ad0d5 - erichuang: feat(api): remove tool_name from To...
"content": {
"type": "string",
"description": "The generated completion text"
@@ -5046,12 +5037,22 @@
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
+<<<<<<< dest: ed6caead724a - ehhuang: chore: simplify _get_tool_defs (#1384)
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
+||||||| base: 1311faf3f5e7 - ehhuang: fix: logging (#1598)
+=======
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricEvent"
+ }
+ },
+>>>>>>> source: ad32270ad0d5 - erichuang: feat(api): remove tool_name from To...
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index cca1872a4..1f9536c2e 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2943,17 +2943,6 @@ components:
type: string
description: >-
Unique identifier for the tool call this response is for
- tool_name:
- oneOf:
- - type: string
- enum:
- - brave_search
- - wolfram_alpha
- - photogen
- - code_interpreter
- title: BuiltinTool
- - type: string
- description: Name of the tool that was called
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
@@ -2961,7 +2950,6 @@ components:
required:
- role
- call_id
- - tool_name
- content
title: ToolResponseMessage
description: >-
@@ -3188,10 +3176,18 @@ components:
CompletionResponse:
type: object
properties:
+<<<<<<< dest: ed6caead724a - ehhuang: chore: simplify _get_tool_defs (#1384)
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
+||||||| base: 1311faf3f5e7 - ehhuang: fix: logging (#1598)
+=======
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/MetricEvent'
+>>>>>>> source: ad32270ad0d5 - erichuang: feat(api): remove tool_name from To...
content:
type: string
description: The generated completion text
@@ -3510,10 +3506,18 @@ components:
CompletionResponseStreamChunk:
type: object
properties:
+<<<<<<< dest: ed6caead724a - ehhuang: chore: simplify _get_tool_defs (#1384)
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
+||||||| base: 1311faf3f5e7 - ehhuang: fix: logging (#1598)
+=======
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/MetricEvent'
+>>>>>>> source: ad32270ad0d5 - erichuang: feat(api): remove tool_name from To...
delta:
type: string
description: >-
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index fa917ac22..0a4324cdf 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -117,13 +117,11 @@ class ToolResponseMessage(BaseModel):
:param role: Must be "tool" to identify this as a tool response
:param call_id: Unique identifier for the tool call this response is for
- :param tool_name: Name of the tool that was called
:param content: The response content from the tool
"""
role: Literal["tool"] = "tool"
call_id: str
- tool_name: Union[BuiltinTool, str]
content: InterleavedContent
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 3f09cacc0..0ae1996cc 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -153,7 +153,6 @@ class ChatAgent(ShieldRunnerMixin):
messages.append(
ToolResponseMessage(
call_id=response.call_id,
- tool_name=response.tool_name,
content=response.content,
)
)
@@ -221,8 +220,7 @@ class ChatAgent(ShieldRunnerMixin):
messages = await self.get_messages_from_turns(turns)
if is_resume:
tool_response_messages = [
- ToolResponseMessage(call_id=x.call_id, tool_name=x.tool_name, content=x.content)
- for x in request.tool_responses
+ ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses
]
messages.extend(tool_response_messages)
last_turn = turns[-1]
@@ -685,7 +683,6 @@ class ChatAgent(ShieldRunnerMixin):
result_messages = [
ToolResponseMessage(
call_id=tool_call.call_id,
- tool_name=tool_call.tool_name,
content=tool_result.content,
)
]
@@ -705,7 +702,7 @@ class ChatAgent(ShieldRunnerMixin):
tool_responses=[
ToolResponse(
call_id=result_message.call_id,
- tool_name=result_message.tool_name,
+ tool_name=tool_call.tool_name,
content=result_message.content,
metadata=tool_result.metadata,
)
@@ -999,7 +996,6 @@ async def attachment_message(tempdir: str, urls: List[URL]) -> ToolResponseMessa
return ToolResponseMessage(
call_id="",
- tool_name=BuiltinTool.code_interpreter,
content=content,
)
diff --git a/tests/integration/fixtures/recorded_responses/chat_completion.json b/tests/integration/fixtures/recorded_responses/chat_completion.json
index 30b7e0b4d..8694cc271 100644
--- a/tests/integration/fixtures/recorded_responses/chat_completion.json
+++ b/tests/integration/fixtures/recorded_responses/chat_completion.json
@@ -12758,6 +12758,292 @@
],
"type": "generator"
},
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"false\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "The",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " boiling point of polyjuice is -100 degrees Fahrenheit.",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 139
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 23
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 162
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "{\"",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "type\": \"function\", \"name\": \"get_boiling_point\", \"",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "parameters\": {\"liquid_name\": \"polyjuice\", \"",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "celcius\": \"false\"}}",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "succeeded"
+ },
+ "tool_call": {
+ "arguments": {
+ "celcius": "false",
+ "liquid_name": "polyjuice"
+ },
+ "call_id": "1ef7adda-5ebb-41d5-a2c6-3e6700de5f81",
+ "tool_name": "get_boiling_point"
+ },
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 91
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 45
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 136
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
"[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
"chunks": [
{
@@ -12806,7 +13092,7 @@
"data": {
"event": {
"delta": {
- "text": " \"type\": \"function\",\n \"",
+ "text": " \"type\": \"function\",\n \"name\": \"get",
"type": "text"
},
"event_type": {
@@ -12826,7 +13112,7 @@
"data": {
"event": {
"delta": {
- "text": "name\": \"get_boiling_point\",\n \"parameters\":",
+ "text": "_boiling_point\",\n \"parameters\": {\n \"liquid",
"type": "text"
},
"event_type": {
@@ -12846,7 +13132,7 @@
"data": {
"event": {
"delta": {
- "text": " {\n \"liquid_name\": \"polyjuice\",\n \"celci",
+ "text": "_name\": \"polyjuice\",\n \"celci",
"type": "text"
},
"event_type": {
@@ -12896,7 +13182,7 @@
"celcius": "true",
"liquid_name": "polyjuice"
},
- "call_id": "73212def-09c0-4a29-845e-149afb38fcd1",
+ "call_id": "40293d5b-8a76-4df5-8325-d6e8755ba513",
"tool_name": "get_boiling_point"
},
"type": "tool_call"
@@ -13084,6 +13370,111 @@
],
"type": "generator"
},
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "The",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " boiling point of polyjuice is -100\u00b0C.",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 85
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 22
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 107
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
"[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point_with_metadata\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
"chunks": [
{
@@ -13209,6 +13600,131 @@
],
"type": "generator"
},
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "The",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " boiling point of polyjuice is -",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "100 degrees Celcius.",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 87
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 25
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 112
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
"[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": {
"chunks": [
{
@@ -13267,7 +13783,7 @@
"__module__": "llama_stack.apis.common.content_types",
"value": "in_progress"
},
- "tool_call": "{\"type\": \"function\", \"name",
+ "tool_call": "{\"type\": \"function\", \"",
"type": "tool_call"
},
"event_type": {
@@ -13292,7 +13808,7 @@
"__module__": "llama_stack.apis.common.content_types",
"value": "in_progress"
},
- "tool_call": "\": \"get_boiling_point\", \"parameters\": {\"liquid_name\": \"poly",
+ "tool_call": "name\": \"get_boiling_point\", \"parameters",
"type": "tool_call"
},
"event_type": {
@@ -13317,7 +13833,32 @@
"__module__": "llama_stack.apis.common.content_types",
"value": "in_progress"
},
- "tool_call": "juice\", \"celcius\": \"true\"}}",
+ "tool_call": "\": {\"liquid_name\": \"polyjuice\", \"celci",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "us\": \"true\"}}",
"type": "tool_call"
},
"event_type": {
@@ -13347,7 +13888,7 @@
"celcius": "true",
"liquid_name": "polyjuice"
},
- "call_id": "e4b0121a-7b75-4e89-be40-d13021a3bb11",
+ "call_id": "f146d04b-c400-4193-a6d8-ccfea7f7b529",
"tool_name": "get_boiling_point"
},
"type": "tool_call"
@@ -13468,7 +14009,7 @@
"__module__": "llama_stack.apis.common.content_types",
"value": "in_progress"
},
- "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point",
+ "tool_call": "{\"type\": \"function\", \"name\": \"",
"type": "tool_call"
},
"event_type": {
@@ -13493,7 +14034,7 @@
"__module__": "llama_stack.apis.common.content_types",
"value": "in_progress"
},
- "tool_call": "_with_metadata\", \"parameters\": {\"liquid_name\": \"polyjuice\",",
+ "tool_call": "get_boiling_point_with_metadata\", \"parameters\": {\"liquid",
"type": "tool_call"
},
"event_type": {
@@ -13518,7 +14059,57 @@
"__module__": "llama_stack.apis.common.content_types",
"value": "in_progress"
},
- "tool_call": " \"celcius\": \"true\"}}",
+ "tool_call": "_name\": \"polyjuice\", \"celci",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "us\": \"",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "true\"}}",
"type": "tool_call"
},
"event_type": {
@@ -13548,7 +14139,7 @@
"celcius": "true",
"liquid_name": "polyjuice"
},
- "call_id": "11c04896-2b7b-49bd-b832-47a1c9f3796f",
+ "call_id": "d6b8a25d-9b4c-4650-bbe6-f94b5fa97e56",
"tool_name": "get_boiling_point_with_metadata"
},
"type": "tool_call"
@@ -13659,7 +14250,7 @@
"data": {
"event": {
"delta": {
- "text": " customer smiled and said \"hello\" to the",
+ "text": " customer smiled and said \"hello\" to the friendly store",
"type": "text"
},
"event_type": {
@@ -13679,7 +14270,7 @@
"data": {
"event": {
"delta": {
- "text": " friendly store clerk.",
+ "text": " clerk.",
"type": "text"
},
"event_type": {
@@ -16564,6 +17155,1609 @@
],
"type": "generator"
},
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"\")\\nprint(df.head())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stdout]\\nYear Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec\\n0 2014 1.6 1.6 1.7 1.8 2.0 1.9 1.9 1.7 1.7 1.8 1.7 1.6\\n1 2015 1.6 1.7 1.8 1.8 1.7 1.8 1.8 1.8 1.9 1.9 2.0 2.1\\n2 2016 2.2 2.3 2.2 2.1 2.2 2.2 2.2 2.3 2.2 2.1 2.1 2.2\\n3 2017 2.3 2.2 2.0 1.9 1.7 1.7 1.7 1.7 1.7 1.8 1.7 1.8\\n4 2018 1.8 1.8 2.1 2.1 2.2 2.3 2.4 2.2 2.2 2.1 2.2 2.2\\n[/stdout]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "The",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " csv file contains a table with 12 columns (Jan to",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " Dec) and 5 rows (2014 to",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " 2018). The values in the table represent the inflation rate",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " for each month of the year from 2014",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " to 2018.",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 469
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 61
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 530
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 142, in \\n line 23, in \\n from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 142, in \\n line 23, in \\n from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "The",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " error message indicates that there is an issue with",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " the import statement. However, the code provided does not contain any",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " import statements that would cause this error.\n\nTo provide a more accurate",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " answer, I would need to know the contents of the CSV file",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " or more information about the error message.\n\nHowever, based on the",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " code provided, it seems like the code is trying to load a",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " CSV file and print some basic information about it. If the file",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " is not found or there is an issue with the file path,",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " this could cause an error.\n\nHere is a",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " revised version of the code that includes some error",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " handling:\n\n```\nimport pandas as pd\nimport code_interpreter",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "\n\ntry:\n # Load the CSV file",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "\n df = pd.read_csv(\"/var/folders/cz",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "/vyh7y1d11xg",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "881lsxsshnc5",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "c0000gn/T/tmpflpgiagc/",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "8S20Zj2Oinflation.csv\")\n\n ",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " # Print the first few rows of the dataframe\n print(df.head",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "())\n\n # Print the data types of each column\n print",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "(df.dtypes)\n\n # Print the",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " summary statistics of the dataframe\n ",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " print(df.describe())\n\nexcept FileNotFoundError:\n print(\"The file",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " was not found.\")\nexcept pd.errors.EmptyDataError",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": ":\n print(\"The file is empty.\")\nexcept pd.errors.ParserError",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": ":\n print(\"An error occurred while parsing the",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " file.\")\nexcept Exception as e:\n print",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "(\"An error occurred: \", str(e))\n``",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "`\n\nThis code will catch specific exceptions that could occur when loading the",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " CSV file and print a more",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " informative error message.",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 393
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 331
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 724
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 142, in \\n line 23, in \\n from .code_execution import CodeExecutionContext, CodeExecutionRequest, CodeExecutor\\nImportError: attempted relative import with no known parent package\\n[/stderr]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "started"
+ },
+ "tool_call": "",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "import pandas as pd\nimport code_interpreter\n\n",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "# Load the CSV file\ndf = pd.read_csv(\"/var/f",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "olders/cz/vyh7y1d11xg881lsx",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "sshnc5c0000gn/T/tmpfl",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "pgiagc/8S20Zj2Oinflation",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": ".csv\")\n\n# Print the first few rows of the",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " dataframe\nprint(df.head())\n\n# Print the data types of each",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " column\nprint(df.dtypes)\n\n#",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " Print the summary statistics of the dataframe\nprint(df.describe())",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "succeeded"
+ },
+ "tool_call": {
+ "arguments": {
+ "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpflpgiagc/8S20Zj2Oinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())"
+ },
+ "call_id": "e999a578-cbd8-4bb8-bc53-deb2fff1ffce",
+ "tool_name": {
+ "__enum__": "BuiltinTool",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "code_interpreter"
+ }
+ },
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 215
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 10
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 225
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "started"
+ },
+ "tool_call": "",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "import pandas as pd\nimport code_interpreter\n\n# Load the",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " CSV file\ndf = pd.read",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "_csv(\"/var/folders/cz/vyh",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "7y1d11xg881lsxsshnc5c",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "0000gn/T/tmpflpgiagc/8S",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "20Zj2Oinflation.csv\")\n\n# Print the first",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " few rows of the dataframe\nprint(df.head())\n\n#",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " Print the data types of each column\nprint",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "(df.dtypes)\n\n# Print the summary statistics of the dataframe",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "\nprint(df.describe())",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "succeeded"
+ },
+ "tool_call": {
+ "arguments": {
+ "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpflpgiagc/8S20Zj2Oinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())"
+ },
+ "call_id": "ea72d524-2d0f-4220-a898-4c295315235e",
+ "tool_name": {
+ "__enum__": "BuiltinTool",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "code_interpreter"
+ }
+ },
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 37
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 10
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 47
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
"[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is in your current directory, you can use the following code:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column count), and the description of the dataframe (including count, mean, std, min, 25%, 50%, 75%, max for each column).\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
"chunks": [
{
@@ -19488,6 +21682,1269 @@
],
"type": "generator"
},
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n 'Oct', 'Nov', 'Dec'],\\n dtype='object')\\nDatatype of the columns are: Year int64\\nJan float64\\nFeb float64\\nMar float64\\nApr float64\\nMay float64\\nJun float64\\nJul float64\\nAug float64\\nSep float64\\nOct float64\\nNov float64\\nDec float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nIt appears that this CSV file contains monthly inflation rates for different years. The 'Year' column represents the year, and the rest of the columns represent the inflation rates for each month of the year.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n\\n# Calculate average yearly inflation\\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df['Year'], df['Average'])\\nplt.xlabel('Year')\\nplt.ylabel('Average Yearly Inflation')\\nplt.title('Average Yearly Inflation Over Time')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "This",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " code will create a line plot of the average yearly inflation over time. The",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " x-axis represents the year, and the y-axis represents the average yearly inflation",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": ". The plot will show the trend of average yearly inflation over the years",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": ".",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 635
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 56
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 691
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n 'Oct', 'Nov', 'Dec'],\\n dtype='object')\\nDatatype of the columns are: Year int64\\nJan float64\\nFeb float64\\nMar float64\\nApr float64\\nMay float64\\nJun float64\\nJul float64\\nAug float64\\nSep float64\\nOct float64\\nNov float64\\nDec float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nIt appears that this CSV file contains monthly inflation rates for different years. The 'Year' column represents the year, and the rest of the columns represent the inflation rates for each month of the year.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "started"
+ },
+ "tool_call": "",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "import pandas as pd\nimport matplotlib.pyplot as",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " plt\n\n# Load data\ndf = pd.read_csv(\"/var/f",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "olders/cz/vyh7y1d11xg881lsx",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "sshnc5c0000gn/T/tmpflpgiagc/",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "2VkeqrPlinflation.csv\")\n\n# Calculate average yearly inflation",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "\ndf['Average'] = df[['Jan', 'Feb', '",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "Mar', 'Apr', 'May', 'Jun', 'Jul',",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " 'Aug', 'Sep', 'Oct', 'Nov', 'Dec",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "']].mean(axis=1)\n\n# Plot time series\nplt.figure(figsize",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "=(10,6))\nplt.plot(df['Year'], df['Average",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "'])\nplt.xlabel('Year')\nplt.ylabel('Average Yearly",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " Inflation')\nplt.title('Average Yearly Inflation Over",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " Time')\nplt.grid(True)\nplt.show()",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "succeeded"
+ },
+ "tool_call": {
+ "arguments": {
+ "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpflpgiagc/2VkeqrPlinflation.csv\")\n\n# Calculate average yearly inflation\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(df['Year'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel('Average Yearly Inflation')\nplt.title('Average Yearly Inflation Over Time')\nplt.grid(True)\nplt.show()"
+ },
+ "call_id": "f82fa3fd-e3be-4cb7-9298-8b4625cf709e",
+ "tool_name": {
+ "__enum__": "BuiltinTool",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "code_interpreter"
+ }
+ },
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 454
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 10
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 464
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n 'Oct', 'Nov', 'Dec'],\\n dtype='object')\\nDatatype of the columns are: Year int64\\nJan float64\\nFeb float64\\nMar float64\\nApr float64\\nMay float64\\nJun float64\\nJul float64\\nAug float64\\nSep float64\\nOct float64\\nNov float64\\nDec float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nThe 'Year' column likely contains the year for which the inflation rates are given. The other columns ('Jan' to 'Dec') likely contain the inflation rates for each month of the year.\\n\\nPlease note that the actual data in the CSV file is not provided, so the above description is based on the structure of the file.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n\\n# Calculate average yearly inflation\\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df['Year'], df['Average'])\\nplt.xlabel('Year')\\nplt.ylabel('Average Yearly Inflation')\\nplt.title('Average Yearly Inflation Over Time')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "This",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " code will create a line plot of the average yearly inflation over",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " time. The x-axis represents the year and the y-axis",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " represents the average yearly inflation. The plot will show the trend",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " of average yearly inflation over the years.",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 661
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 55
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 716
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n 'Oct', 'Nov', 'Dec'],\\n dtype='object')\\nDatatype of the columns are: Year int64\\nJan float64\\nFeb float64\\nMar float64\\nApr float64\\nMay float64\\nJun float64\\nJul float64\\nAug float64\\nSep float64\\nOct float64\\nNov float64\\nDec float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"This CSV file contains 10 rows and 13 columns. The columns are named 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'. The data types of these columns are int64 for 'Year' and float64 for the rest.\\n\\nThe 'Year' column likely contains the year for which the inflation rates are given. The other columns ('Jan' to 'Dec') likely contain the inflation rates for each month of the year.\\n\\nPlease note that the actual data in the CSV file is not provided, so the above description is based on the structure of the file.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "started"
+ },
+ "tool_call": "",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\n",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "df = pd.read_csv(\"/var/folders/cz/vyh7",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "y1d11xg881lsxsshnc5c0000",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "gn/T/tmpfsp7c9_g/Aih5TPOuin",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "flation.csv\")\n\n# Calculate average yearly inflation",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "\ndf['Average'] = df[['Jan', 'Feb',",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " 'Mar', 'Apr', 'May', 'Jun', '",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "Jul', 'Aug', 'Sep', 'Oct', 'Nov",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "', 'Dec']].mean(axis=1)\n\n# Plot time series",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "\nplt.figure(figsize=(10,6))\nplt.plot(df['",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "Year'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "('Average Yearly Inflation')\nplt.title('",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "Average Yearly Inflation Over Time')\nplt.grid(True)\nplt",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": ".show()",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "succeeded"
+ },
+ "tool_call": {
+ "arguments": {
+ "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpfsp7c9_g/Aih5TPOuinflation.csv\")\n\n# Calculate average yearly inflation\ndf['Average'] = df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(df['Year'], df['Average'])\nplt.xlabel('Year')\nplt.ylabel('Average Yearly Inflation')\nplt.title('Average Yearly Inflation Over Time')\nplt.grid(True)\nplt.show()"
+ },
+ "call_id": "dce1b106-06e1-4163-ae85-f9a2491f4375",
+ "tool_name": {
+ "__enum__": "BuiltinTool",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "code_interpreter"
+ }
+ },
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 480
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 10
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 490
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
"[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
"chunks": [
{
@@ -20531,6 +23988,645 @@
],
"type": "generator"
},
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stdout]\\nNumber of rows and columns in the data: (10, 13)\\nColumns of the data are: 13\\nColumns of the data are: Index(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',\\n 'Oct', 'Nov', 'Dec'],\\n dtype='object')\\nDatatype of the columns are: Year int64\\nJan float64\\nFeb float64\\nMar float64\\nApr float64\\nMay float64\\nJun float64\\nJul float64\\nAug float64\\nSep float64\\nOct float64\\nNov float64\\nDec float64\\ndtype: object\\n[/stdout]\", \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "This",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " CSV file contains 10 rows and 13 columns. The columns are",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " named 'Year', 'Jan', 'Feb', 'Mar', '",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "Apr', 'May', 'Jun', 'Jul', 'Aug',",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " 'Sep', 'Oct', 'Nov', 'Dec'. The data",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " types of these columns are int64 for 'Year",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "' and float64 for the rest.\n\nIt appears that this CSV file",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " contains monthly inflation rates for different years. The 'Year' column represents",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " the year, and the rest of the columns represent the inflation rates",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " for each month of the",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " year.",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 327
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 125
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 452
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "started"
+ },
+ "tool_call": "",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "import pandas as pd\n# Load data\ndf = pd.read",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "_csv(\"/var/folders/cz/vyh7",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "y1d11xg881lsxsshnc5c000",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": "0gn/T/tmpflpgiagc/2VkeqrPlinflation",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": ".csv\")\n# Rows\nprint(\"Number of rows and columns in",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": ":\", len(df.columns))\n# Column names\nprint(\"Columns of the data",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "in_progress"
+ },
+ "tool_call": " the columns are:\", df.dtypes)",
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "succeeded"
+ },
+ "tool_call": {
+ "arguments": {
+ "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpflpgiagc/2VkeqrPlinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)"
+ },
+ "call_id": "b8aab119-7997-428e-81ab-e6aa163f7acc",
+ "tool_name": {
+ "__enum__": "BuiltinTool",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "code_interpreter"
+ }
+ },
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 36
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 10
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 46
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
"[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:02bc2\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:e40e6\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:200a9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:e40e6\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:e40e6\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:e40e6\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7bf28\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:af719\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b299f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7bf28\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:af719\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "{\"",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "type\": \"function\", \"name\":",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " \"knowledge_search\", \"parameters",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "\": {\"query\": \"How to use Lo",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "RA in Torchtune\"}}",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "parse_status": {
+ "__enum__": "ToolCallParseStatus",
+ "__module__": "llama_stack.apis.common.content_types",
+ "value": "succeeded"
+ },
+ "tool_call": {
+ "arguments": {
+ "query": "How to use LoRA in Torchtune"
+ },
+ "call_id": "3d9a3bd1-4a05-4feb-b5a2-eed7a7a24f1b",
+ "tool_name": "knowledge_search"
+ },
+ "type": "tool_call"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 117
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 40
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 157
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
+ "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7bf28\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:b299f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:af719\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": {
+ "chunks": [
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "start"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "I",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "'m ready to help you answer questions about",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": " Torchtune based on the documentation you provided",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": ". What's your first question?",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "progress"
+ },
+ "logprobs": null,
+ "stop_reason": null
+ },
+ "metrics": null
+ }
+ },
+ {
+ "__module__": "llama_stack.apis.inference.inference",
+ "__pydantic__": "ChatCompletionResponseStreamChunk",
+ "data": {
+ "event": {
+ "delta": {
+ "text": "",
+ "type": "text"
+ },
+ "event_type": {
+ "__enum__": "ChatCompletionResponseEventType",
+ "__module__": "llama_stack.apis.inference.inference",
+ "value": "complete"
+ },
+ "logprobs": null,
+ "stop_reason": {
+ "__enum__": "StopReason",
+ "__module__": "llama_stack.models.llama.datatypes",
+ "value": "end_of_turn"
+ }
+ },
+ "metrics": [
+ {
+ "metric": "prompt_tokens",
+ "unit": null,
+ "value": 75
+ },
+ {
+ "metric": "completion_tokens",
+ "unit": null,
+ "value": 35
+ },
+ {
+ "metric": "total_tokens",
+ "unit": null,
+ "value": 110
+ }
+ ]
+ }
+ }
+ ],
+ "type": "generator"
+ },
"[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c1f5\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f9c19\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:13786\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:bbddb\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:15b86\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:83901\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:15b86\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:15b86\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:15b86\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:bbddb\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset