From 4948cda0da927702f41d7a6fe99df47e515872ea Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 12 Mar 2025 16:59:02 -0700 Subject: [PATCH] regen --- .../recorded_responses/chat_completion.json | 26969 +++++++++++++++- .../recorded_responses/invoke_tool.json | 81 +- 2 files changed, 25663 insertions(+), 1387 deletions(-) diff --git a/tests/integration/fixtures/recorded_responses/chat_completion.json b/tests/integration/fixtures/recorded_responses/chat_completion.json index 7234b6c31..71fbc9361 100644 --- a/tests/integration/fixtures/recorded_responses/chat_completion.json +++ b/tests/integration/fixtures/recorded_responses/chat_completion.json @@ -26786,7 +26786,7 @@ "data": { "event": { "delta": { - "text": " provided function definitions", + "text": " provided function definitions are", "type": "text" }, "event_type": { @@ -26806,7 +26806,7 @@ "data": { "event": { "delta": { - "text": " are not suitable", + "text": " not suitable", "type": "text" }, "event_type": { @@ -26826,7 +26826,7 @@ "data": { "event": { "delta": { - "text": " for this task. Please re", + "text": " for this", "type": "text" }, "event_type": { @@ -26846,7 +26846,7 @@ "data": { "event": { "delta": { - "text": "work them to", + "text": " task", "type": "text" }, "event_type": { @@ -26866,7 +26866,107 @@ "data": { "event": { "delta": { - "text": " align with the task requirements.", + "text": ". Please", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " rework", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " them to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " align with", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the task", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " requirements.", "type": "text" }, "event_type": { @@ -26903,54 +27003,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "D2n_IS_8", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:03:32.021393+00:00", - "__module__": "datetime" - }, - "trace_id": "amAiZv5PQKSsA74j", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 90 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "D2n_IS_8", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:03:32.021420+00:00", - "__module__": "datetime" - }, - "trace_id": "amAiZv5PQKSsA74j", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 32 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "D2n_IS_8", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:03:32.021427+00:00", - "__module__": "datetime" - }, - "trace_id": "amAiZv5PQKSsA74j", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 122 } ] @@ -27795,7 +27859,7 @@ "data": { "event": { "delta": { - "text": "get_boiling_point(liquid_name='polyjuice", + "text": "get_bo", "type": "text" }, "event_type": { @@ -27815,7 +27879,147 @@ "data": { "event": { "delta": { - "text": "', celcius=True)]", + "text": "iling_point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(liquid", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_name='", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "polyju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice',", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " celci", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "us=True", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ")]", "type": "text" }, "event_type": { @@ -27845,7 +28049,7 @@ "celcius": true, "liquid_name": "polyjuice" }, - "call_id": "3955f756-9aa0-433f-be8f-af8941c220de", + "call_id": "a1515d69-2f3f-43a3-9d1d-7b78b2c9fb5a", "tool_name": "get_boiling_point" }, "type": "tool_call" @@ -27888,54 +28092,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "QZ6PSGpT", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:03:29.629456+00:00", - "__module__": "datetime" - }, - "trace_id": "M72bosg8TBe3uhx3", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 43 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "QZ6PSGpT", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:03:29.629488+00:00", - "__module__": "datetime" - }, - "trace_id": "M72bosg8TBe3uhx3", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 28 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "QZ6PSGpT", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:03:29.629494+00:00", - "__module__": "datetime" - }, - "trace_id": "M72bosg8TBe3uhx3", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 71 } ] @@ -27992,7 +28160,7 @@ "data": { "event": { "delta": { - "text": " function call returned an", + "text": " function get", "type": "text" }, "event_type": { @@ -28012,7 +28180,7 @@ "data": { "event": { "delta": { - "text": " error since", + "text": "_boiling", "type": "text" }, "event_type": { @@ -28032,7 +28200,7 @@ "data": { "event": { "delta": { - "text": " \"", + "text": "_point is", "type": "text" }, "event_type": { @@ -28052,7 +28220,7 @@ "data": { "event": { "delta": { - "text": "polyjuice\" is", + "text": " not able", "type": "text" }, "event_type": { @@ -28072,7 +28240,7 @@ "data": { "event": { "delta": { - "text": " not a real liquid. Polyju", + "text": " to determine", "type": "text" }, "event_type": { @@ -28092,7 +28260,7 @@ "data": { "event": { "delta": { - "text": "ice is a fictional substance from the", + "text": " the boiling", "type": "text" }, "event_type": { @@ -28112,7 +28280,7 @@ "data": { "event": { "delta": { - "text": " Harry Potter series. The boiling point", + "text": " point of", "type": "text" }, "event_type": { @@ -28132,7 +28300,7 @@ "data": { "event": { "delta": { - "text": " of a substance is a physical", + "text": " \"polyju", "type": "text" }, "event_type": { @@ -28152,7 +28320,7 @@ "data": { "event": { "delta": { - "text": " property that can be measured and", + "text": "ice\"", "type": "text" }, "event_type": { @@ -28172,7 +28340,7 @@ "data": { "event": { "delta": { - "text": " quantified", + "text": " as it", "type": "text" }, "event_type": { @@ -28192,7 +28360,7 @@ "data": { "event": { "delta": { - "text": ", but it only applies", + "text": " is not", "type": "text" }, "event_type": { @@ -28212,7 +28380,187 @@ "data": { "event": { "delta": { - "text": " to real substances that exist in the physical world.", + "text": " a real", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " liquid.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "juice", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " fictional substance", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " from the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Harry Potter", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " series", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", "type": "text" }, "event_type": { @@ -28249,55 +28597,19 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "y9SHtJTQ", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:01.411612+00:00", - "__module__": "datetime" - }, - "trace_id": "_I2Cu85IRtOSBSX9", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 84 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "y9SHtJTQ", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:01.411644+00:00", - "__module__": "datetime" - }, - "trace_id": "_I2Cu85IRtOSBSX9", - "type": "metric", - "unit": "tokens", - "value": 73 + "unit": null, + "value": 51 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "y9SHtJTQ", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:01.411650+00:00", - "__module__": "datetime" - }, - "trace_id": "_I2Cu85IRtOSBSX9", - "type": "metric", - "unit": "tokens", - "value": 157 + "unit": null, + "value": 135 } ] } @@ -28514,7 +28826,7 @@ "data": { "event": { "delta": { - "text": " function get_bo", + "text": " function call", "type": "text" }, "event_type": { @@ -28534,7 +28846,7 @@ "data": { "event": { "delta": { - "text": "iling_point_with_metadata does not exist,", + "text": " should be", "type": "text" }, "event_type": { @@ -28554,7 +28866,7 @@ "data": { "event": { "delta": { - "text": " I will", + "text": " [get", "type": "text" }, "event_type": { @@ -28574,7 +28886,7 @@ "data": { "event": { "delta": { - "text": " assume you", + "text": "_boiling", "type": "text" }, "event_type": { @@ -28594,7 +28906,7 @@ "data": { "event": { "delta": { - "text": " meant get_bo", + "text": "_point_with", "type": "text" }, "event_type": { @@ -28614,7 +28926,7 @@ "data": { "event": { "delta": { - "text": "iling_point_with_metadata", + "text": "_metadata(", "type": "text" }, "event_type": { @@ -28634,7 +28946,7 @@ "data": { "event": { "delta": { - "text": ". The boiling point of polyjuice", + "text": "liquid_name", "type": "text" }, "event_type": { @@ -28654,7 +28966,87 @@ "data": { "event": { "delta": { - "text": " is -100.", + "text": "=\"poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "juice", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\", cel", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "cius", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "=True)]", "type": "text" }, "event_type": { @@ -28691,55 +29083,19 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "8dM6i5mO", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:03.329281+00:00", - "__module__": "datetime" - }, - "trace_id": "zMJDP5dXRrChi7uE", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 86 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "8dM6i5mO", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:03.329312+00:00", - "__module__": "datetime" - }, - "trace_id": "zMJDP5dXRrChi7uE", - "type": "metric", - "unit": "tokens", - "value": 45 + "unit": null, + "value": 35 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "8dM6i5mO", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:03.329318+00:00", - "__module__": "datetime" - }, - "trace_id": "zMJDP5dXRrChi7uE", - "type": "metric", - "unit": "tokens", - "value": 131 + "unit": null, + "value": 121 } ] } @@ -28976,7 +29332,7 @@ "data": { "event": { "delta": { - "text": "get_boiling_point(liquid_name='polyjuice", + "text": "get_bo", "type": "text" }, "event_type": { @@ -28996,7 +29352,147 @@ "data": { "event": { "delta": { - "text": "', celcius=True)]", + "text": "iling_point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(liquid", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_name='", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "polyju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice',", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " celci", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "us=True", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ")]", "type": "text" }, "event_type": { @@ -29026,7 +29522,7 @@ "celcius": true, "liquid_name": "polyjuice" }, - "call_id": "328cb19d-47bb-47cc-8258-a5ca2e26803e", + "call_id": "eafd0169-6523-4625-8d5f-25e902b70abb", "tool_name": "get_boiling_point" }, "type": "tool_call" @@ -29069,54 +29565,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "dS0bhfN_", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:53.324788+00:00", - "__module__": "datetime" - }, - "trace_id": "UJz5Cas1SDyQYeBk", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 37 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "dS0bhfN_", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:53.324835+00:00", - "__module__": "datetime" - }, - "trace_id": "UJz5Cas1SDyQYeBk", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 28 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "dS0bhfN_", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:53.324844+00:00", - "__module__": "datetime" - }, - "trace_id": "UJz5Cas1SDyQYeBk", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 65 } ] @@ -29173,7 +29633,7 @@ "data": { "event": { "delta": { - "text": "get_boiling_point_with_metadata", + "text": "get_bo", "type": "text" }, "event_type": { @@ -29193,7 +29653,7 @@ "data": { "event": { "delta": { - "text": "(liquid_name='polyjuice', cel", + "text": "iling_point", "type": "text" }, "event_type": { @@ -29213,7 +29673,147 @@ "data": { "event": { "delta": { - "text": "cius=True)]", + "text": "_with_metadata", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(liquid", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_name='", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "polyju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice',", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " celci", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "us=True", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ")]", "type": "text" }, "event_type": { @@ -29243,7 +29843,7 @@ "celcius": true, "liquid_name": "polyjuice" }, - "call_id": "5bb48d00-7d5c-49e2-bddf-e5fdc5f35485", + "call_id": "71898ef2-4a6d-4131-ac62-281c8fb5d29c", "tool_name": "get_boiling_point_with_metadata" }, "type": "tool_call" @@ -29286,54 +29886,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "mfrFN7m2", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:02.136501+00:00", - "__module__": "datetime" - }, - "trace_id": "T4eddr4-SMWPQwKA", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 37 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "mfrFN7m2", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:02.136529+00:00", - "__module__": "datetime" - }, - "trace_id": "T4eddr4-SMWPQwKA", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 30 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "mfrFN7m2", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:05:02.136535+00:00", - "__module__": "datetime" - }, - "trace_id": "T4eddr4-SMWPQwKA", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 67 } ] @@ -29390,7 +29954,7 @@ "data": { "event": { "delta": { - "text": " I answered the", + "text": " I answered", "type": "text" }, "event_type": { @@ -29410,7 +29974,7 @@ "data": { "event": { "delta": { - "text": " phone, the friendly", + "text": " the phone", "type": "text" }, "event_type": { @@ -29430,7 +29994,7 @@ "data": { "event": { "delta": { - "text": " voice on the other end said \"hello\"", + "text": ",", "type": "text" }, "event_type": { @@ -29450,7 +30014,187 @@ "data": { "event": { "delta": { - "text": " and asked how I was doing.", + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " friendly voice", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " on the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " other", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " end", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " said \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "hello\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " asked", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " how I was doing.", "type": "text" }, "event_type": { @@ -29487,54 +30231,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "tJEuRhla", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:01.044284+00:00", - "__module__": "datetime" - }, - "trace_id": "bnDS7Z41TRO0UyfH", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 30 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "tJEuRhla", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:01.044312+00:00", - "__module__": "datetime" - }, - "trace_id": "bnDS7Z41TRO0UyfH", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 34 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "tJEuRhla", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:01.044318+00:00", - "__module__": "datetime" - }, - "trace_id": "bnDS7Z41TRO0UyfH", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 64 } ] @@ -29792,13 +30500,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" + "text": "The", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29817,13 +30520,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\n# Load data\ndf =", - "type": "tool_call" + "text": " error message", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29842,13 +30540,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": " pd.read_csv(\"/var/folders/rb/qv8vwgyj", - "type": "tool_call" + "text": " indicates that", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29867,13 +30560,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": "6yjd3t4pwsy9t0rm0000", - "type": "tool_call" + "text": " the file", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29892,13 +30580,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": "gn/T/tmp2x_sml66/ZEjbinQHin", - "type": "tool_call" + "text": " 'b", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29917,13 +30600,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": "flation.csv\")\n# Rows\nprint(\"Number of rows and columns in the", - "type": "tool_call" + "text": "wrap'", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29942,13 +30620,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": " data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\",", - "type": "tool_call" + "text": " was", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29967,13 +30640,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": " len(df.columns))\n# Column names\nprint(\"Columns of the data", - "type": "tool_call" + "text": " not found", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -29992,13 +30660,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": " are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df", - "type": "tool_call" + "text": ". This", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -30017,13 +30680,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": ".dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\n", - "type": "tool_call" + "text": " is likely", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -30042,13 +30700,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "in_progress" - }, - "tool_call": "print(df.head())", - "type": "tool_call" + "text": " because the", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -30067,23 +30720,8 @@ "data": { "event": { "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "__module__": "llama_stack.apis.common.content_types", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp2x_sml66/ZEjbinQHinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\nprint(df.head())" - }, - "call_id": "1df8b196-9eff-4b06-97e7-ab175c741e8f", - "tool_name": { - "__enum__": "BuiltinTool", - "__module__": "llama_stack.models.llama.datatypes", - "value": "code_interpreter" - } - }, - "type": "tool_call" + "text": " file", + "type": "text" }, "event_type": { "__enum__": "ChatCompletionResponseEventType", @@ -30091,11 +30729,1767 @@ "value": "progress" }, "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "__module__": "llama_stack.models.llama.datatypes", - "value": "end_of_turn" - } + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path provided", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is incorrect", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " or the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " does", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " not exist", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " in the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " current working", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " directory.\n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "To resolve", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " this issue", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " can", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " try", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the following", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ":\n\n1", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". Check", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path:", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Ensure that", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path provided", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is correct", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file exists", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " in the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " specified", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " location", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".\n2", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". Use", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " correct", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file path", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ": If", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is located", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " in a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " different", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " directory", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", provide", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the correct", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "3", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". Check the file name:", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Ensure that", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " name", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " correct and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " matches the one", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " provided in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " code.\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "4.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Use the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " absolute file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path:", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Instead of", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " using a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " relative file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " try using", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the absolute", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file path", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file.\n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "If you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " are still", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " encountering issues", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ",", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " please", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " provide more", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " details about", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and its", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " location,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'ll be", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " happy to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " assist you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " further", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, "metrics": null } @@ -30123,55 +32517,19 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "fLqIbpek", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:40.262304+00:00", - "__module__": "datetime" - }, - "trace_id": "StUjhrTMQKKQSRvS", - "type": "metric", - "unit": "tokens", - "value": 235 + "unit": null, + "value": 236 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "fLqIbpek", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:40.262340+00:00", - "__module__": "datetime" - }, - "trace_id": "StUjhrTMQKKQSRvS", - "type": "metric", - "unit": "tokens", - "value": 10 + "unit": null, + "value": 183 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "fLqIbpek", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:40.262347+00:00", - "__module__": "datetime" - }, - "trace_id": "StUjhrTMQKKQSRvS", - "type": "metric", - "unit": "tokens", - "value": 245 + "unit": null, + "value": 419 } ] } @@ -30237,7 +32595,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "import pandas as pd\n# Load data\ndf = pd", + "tool_call": "import pandas", "type": "tool_call" }, "event_type": { @@ -30262,7 +32620,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": ".read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4", + "tool_call": " as pd", "type": "tool_call" }, "event_type": { @@ -30287,7 +32645,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "pwsy9t0rm0000gn/T/tmp2x_sml66/ZEj", + "tool_call": "\n#", "type": "tool_call" }, "event_type": { @@ -30312,7 +32670,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "binQHinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data", + "tool_call": " Load data", "type": "tool_call" }, "event_type": { @@ -30337,7 +32695,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": ":\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n#", + "tool_call": "\ndf", "type": "tool_call" }, "event_type": { @@ -30362,7 +32720,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint", + "tool_call": " = pd", "type": "tool_call" }, "event_type": { @@ -30387,7 +32745,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "(\"Datatype of the columns are:\", df.dtypes)\n# Sample", + "tool_call": ".read_csv", "type": "tool_call" }, "event_type": { @@ -30412,7 +32770,1682 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " of data\nprint(\"Data sample from file:\")\nprint(df.head())", + "tool_call": "(\"/var", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/f", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "olders/r", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "b/q", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "v", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "qy", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "vgy", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "j", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "6", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "yjd", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "3t", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "4p", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "ws", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "y", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "9t", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0rm", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0000", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "gn/T", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/tmp", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "xgx", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "j", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "70", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "y_/", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "4NO", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0CF", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "URin", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "flation.csv", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\")\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Rows\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "print(\"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Number of", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " rows and", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " columns in", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the data", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ":\", df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".shape)\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(\"Columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " data are", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ":\", len", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(df.columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "))\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Column names", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(\"Columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " data are", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ":\", df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".columns)\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Column", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " dtypes", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(\"Dat", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "atype of", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " are:\",", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " df.d", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "types)\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Sample", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of data", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(\"Data", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " sample from", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " file:\")\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "print(df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".head())", "type": "tool_call" }, "event_type": { @@ -30439,9 +34472,9 @@ }, "tool_call": { "arguments": { - "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp2x_sml66/ZEjbinQHinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\nprint(df.head())" + "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/rb/qvqyvgyj6yjd3t4pwsy9t0rm0000gn/T/tmpxgxj70y_/4NO0CFURinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\nprint(df.head())" }, - "call_id": "c1708ded-f272-4008-b91f-19d61780c394", + "call_id": "923b4193-8e2e-4b28-b55d-3d0e6e9a3b90", "tool_name": { "__enum__": "BuiltinTool", "__module__": "llama_stack.models.llama.datatypes", @@ -30488,54 +34521,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "KTMayjIE", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:37.305765+00:00", - "__module__": "datetime" - }, - "trace_id": "StUjhrTMQKKQSRvS", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 37 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "KTMayjIE", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:37.305820+00:00", - "__module__": "datetime" - }, - "trace_id": "StUjhrTMQKKQSRvS", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 10 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "KTMayjIE", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:37.305832+00:00", - "__module__": "datetime" - }, - "trace_id": "StUjhrTMQKKQSRvS", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 47 } ] @@ -31505,6 +35502,5140 @@ ], "type": "generator" }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the shape of the dataframe (number of rows and columns)\\nprint(df.shape)\\n\\n# Print the column names\\nprint(df.columns)\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print a summary of the dataframe (count, mean, std, min, 25%, 50%, 75%, max)\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"The error message indicates that the file \\\"\" does not exist. This could be due to a number of reasons, such as the file being deleted or moved, or the path being incorrect.\\n\\nTo resolve this issue, you can try the following:\\n\\n1. Check the file path: Make sure the file path is correct and the file exists at that location.\\n2. Check the file name: Make sure the file name is correct and it matches the one you are trying to load.\\n3. Check the file format: Make sure the file is in the correct format (CSV) and it is not corrupted.\\n4. Try a different file: If the file is not available, try loading a different file to see if the issue is specific to this file or not.\\n\\nIf you are still having trouble, please provide more information about the file and the error message you are receiving, and I will do my best to assist you.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Convert the 'Year' column to datetime\\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\\n\\n# Group by 'Year' and calculate the average inflation\\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\\n\\n# Plot the average inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " error message", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " indicates that", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " \"/var", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/folders", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/rb", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/qv", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "8vw", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "gyj", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "6y", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "jd3", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "t4", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "p", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "wsy", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "9t", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "0rm", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "0000", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "gn/T", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/tmpx", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "gxj", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "70y", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_/Z", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "qZ", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "39W", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "iyin", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "flation.csv", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\" does", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " not exist", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". This", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " could be", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " due to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " number of", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " reasons,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " such as", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " being deleted", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " or moved", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", or", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path being", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " incorrect.\n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "To resolve", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " this issue", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " can try", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the following", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ":\n\n1", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". Check", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path:", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Make sure", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " correct and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " exists at", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " that location", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "2", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". Check", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " name:", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Make sure", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " name is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " correct and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " it matches", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " one you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " are trying", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " load", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".\n3", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". Check", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file format", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ": Make", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " sure", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the correct", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " format (", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "CSV)", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and it", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " corrupted.\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "4.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Try a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " different file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ": If", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " available,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " try loading", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " a different", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " see if", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " issue is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " specific to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " this", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file or", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " not.\n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "If you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " are still", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " having trouble", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ",", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " please provide", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " more information", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " about the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the error", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " message you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " are receiving", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " I will", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " do my", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " best to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " assist you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 732 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 239 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 971 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the shape of the dataframe (number of rows and columns)\\nprint(df.shape)\\n\\n# Print the column names\\nprint(df.columns)\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print a summary of the dataframe (count, mean, std, min, 25%, 50%, 75%, max)\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"The error message indicates that the file \\\"\" does not exist. This could be due to a number of reasons, such as the file being deleted or moved, or the path being incorrect.\\n\\nTo resolve this issue, you can try the following:\\n\\n1. Check the file path: Make sure the file path is correct and the file exists at that location.\\n2. Check the file name: Make sure the file name is correct and it matches the one you are trying to load.\\n3. Check the file format: Make sure the file is in the correct format (CSV) and it is not corrupted.\\n4. Try a different file: If the file is not available, try loading a different file to see if the issue is specific to this file or not.\\n\\nIf you are still having trouble, please provide more information about the file and the error message you are receiving, and I will do my best to assist you.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " as pd", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import matplotlib", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".pyplot as", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " plt\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Load", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the CSV", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " file\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "df =", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " pd.read", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_csv(\"/", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "var/f", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "olders/r", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "b/q", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "v8", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "vw", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "gy", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "j6", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "yjd", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "3t", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "4p", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "ws", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "y", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "9t", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0rm", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0000", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "gn", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/T", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/tmpx", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "gxj", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "70y", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_/Z", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "qZ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "39W", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "iyin", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "flation.csv", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\")\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Convert", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the '", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Year'", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " column to", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " datetime\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "df['", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Year']", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " = pd", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".to_datetime", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(df['", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Year", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "'],", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " format='%", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Y')\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Group", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " by '", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Year", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "'", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " and calculate", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the average", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " inflation\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "df_avg", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " = df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".groupby('", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Year')['", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "'].mean", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "().", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "reset", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_index()\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Plot", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the average", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " as", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " a time", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " series\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "plt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".figure(figsize", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "=(10", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ",6", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "))\nplt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".plot(df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_avg_in", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "flation['", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Year'],", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " df_avg", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "['", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "In", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "flation'], marker='o", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "')\nplt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".title", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "('Average", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Yearly", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "')\nplt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".xlabel('", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Year')\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "plt.ylabel('Inflation')\nplt.grid(True)\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "plt.show", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "()", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmpxgxj70y_/ZqZ39Wiyinflation.csv\")\n\n# Convert the 'Year' column to datetime\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\n\n# Group by 'Year' and calculate the average inflation\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n# Plot the average inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt.grid(True)\nplt.show()" + }, + "call_id": "48277e37-1992-4510-9751-9895707cb190", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 484 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 10 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 494 + } + ] + } + } + ], + "type": "generator" + }, "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { "chunks": [ { @@ -32066,6 +41197,2511 @@ ], "type": "generator" }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the shape of the dataframe (number of rows and columns)\\nprint(df.shape)\\n\\n# Print the column names\\nprint(df.columns)\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print a summary of the dataframe (count, mean, std, min, 25%, 50%, 75%, max)\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " error message", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " indicates that", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " \"/", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "var/f", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "olders/r", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "b/q", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "v8", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "vwgy", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "j6", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "yjd", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "3t", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "4p", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "wsy", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "9", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "t0", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "rm000", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "0gn", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/T/tmp", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "xgx", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "j70", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "y_/", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Zq", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Z39", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Wiy", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "inflation", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".csv", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\" does", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " not exist", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". This", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " could be", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " due to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " number of", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " reasons,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " such as", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " being deleted", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " or", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " moved,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " or the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path being", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " incorrect.\n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "To resolve", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " this issue", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " can try", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the following", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ":\n\n1", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Check the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file path", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ": Make", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " sure the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file path", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " correct", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file exists", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " at that", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " location", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "2.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Check the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file name", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ": Make", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " sure the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file name", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is correct", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and it", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " matches the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " one you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " are trying", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to load", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".\n3", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". Check", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " format:", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Make sure", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the correct format (CSV)", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and it is not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " corrupted.\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "4", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Try a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " different file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ":", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " If", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " available,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " try loading", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " a different", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " see if", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " issue is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " specific to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " this file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " or not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".\n\nIf", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " you are", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " still having", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " trouble,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " please provide", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " more information", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " about the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the error", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " message", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " you are", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " receiving,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " will", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " do my", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " best to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " assist you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 243 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 239 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 482 + } + ] + } + } + ], + "type": "generator" + }, "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { "chunks": [ { @@ -32124,7 +43760,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "import pandas as pd\n\n# Load the CSV file\ndf = pd.read", + "tool_call": "import pandas", "type": "tool_call" }, "event_type": { @@ -32149,7 +43785,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "_csv(\"/var/folders/rb/qv8vwgyj6y", + "tool_call": " as pd", "type": "tool_call" }, "event_type": { @@ -32174,7 +43810,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "jd3t4pwsy9t0rm0000gn/T", + "tool_call": "\n\n#", "type": "tool_call" }, "event_type": { @@ -32199,7 +43835,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "/tmp2x_sml66/9vYvmVRoinflation.csv", + "tool_call": " Load the", "type": "tool_call" }, "event_type": { @@ -32224,7 +43860,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n", + "tool_call": " CSV file", "type": "tool_call" }, "event_type": { @@ -32249,7 +43885,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "# Print information about the dataframe\nprint(df", + "tool_call": "\n", "type": "tool_call" }, "event_type": { @@ -32274,7 +43910,1757 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": ".info())\n\n# Print summary statistics about the dataframe\nprint(df.describe", + "tool_call": "df =", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " pd.read", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_csv(\"/", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "var/f", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "olders/r", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "b/q", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "v8", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "vwgy", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "j6", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "yjd", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "3", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "t", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "4p", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "wsy", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "9t", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0rm", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "000", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "gn/T", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/tmpx", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "gxj70y_/ZqZ39", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "Wiy", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".csv\")\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Print", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the first", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " few rows", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " dataframe\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "print(df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".head())\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Print", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the shape", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " dataframe (", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "number", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of rows", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " and columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ")\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "print", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(df.shape", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ")\n\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Print the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " column names", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(df.columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ")\n\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Print the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " data types", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of each", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " column\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "print(df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".dtypes", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ")\n\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Print a", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " summary of", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " dataframe (", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "count,", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " mean,", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " std", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ",", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " min,", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 25", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "%, ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "50%,", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 75", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "%, max", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ")\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "print", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(df.describe", "type": "tool_call" }, "event_type": { @@ -32326,9 +45712,9 @@ }, "tool_call": { "arguments": { - "code": "import pandas as pd\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp2x_sml66/9vYvmVRoinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print information about the dataframe\nprint(df.info())\n\n# Print summary statistics about the dataframe\nprint(df.describe())" + "code": "import pandas as pd\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmpxgxj70y_/ZqZ39Wiyinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the shape of the dataframe (number of rows and columns)\nprint(df.shape)\n\n# Print the column names\nprint(df.columns)\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print a summary of the dataframe (count, mean, std, min, 25%, 50%, 75%, max)\nprint(df.describe())" }, - "call_id": "5bbfebeb-4360-4ef9-a9e2-4227a8e8c699", + "call_id": "c3783091-f1cf-49e0-bc3b-a827618dcbe0", "tool_name": { "__enum__": "BuiltinTool", "__module__": "llama_stack.models.llama.datatypes", @@ -32375,54 +45761,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "AyEX3So6", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:17.873486+00:00", - "__module__": "datetime" - }, - "trace_id": "kNsljyzfQV2Cn4aZ", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 36 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "AyEX3So6", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:17.873500+00:00", - "__module__": "datetime" - }, - "trace_id": "kNsljyzfQV2Cn4aZ", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 10 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "AyEX3So6", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:17.873503+00:00", - "__module__": "datetime" - }, - "trace_id": "kNsljyzfQV2Cn4aZ", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 46 } ] @@ -34367,6 +47717,6746 @@ ], "type": "generator" }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:89553\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:700ad\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:122a9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"using LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:700ad\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:700ad\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:700ad\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:89553\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:700ad\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:122a9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "[k", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "nowledge_search", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(query=\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "using Lo", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "RA in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Torcht", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "une\")]", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "using LoRA in Torchtune" + }, + "call_id": "0d7b903c-c045-4c45-88d8-1200d809b37a", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 107 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 23 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 130 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:89553\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:700ad\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:122a9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to help", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". What", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'s your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " question", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " about", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Torcht", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "une?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 75 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 25 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 100 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b065e\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:fe9fc\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:c96ea\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"using LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:fe9fc\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:fe9fc\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:fe9fc\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b065e\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:fe9fc\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:c96ea\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help. What's your question about Torchtune?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "[k", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "nowledge_search", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(query=\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "using Lo", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "RA in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Torcht", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "une", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\")]", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "using LoRA in Torchtune" + }, + "call_id": "2bfd3d0d-5172-4031-af00-fd6c456e4fd9", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 107 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 23 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 130 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b065e\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:fe9fc\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:c96ea\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready to help. What", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'s your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " question about", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Tor", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "chtune", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 75 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 25 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 100 + } + ] + } + } + ], + "type": "generator" + }, "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { "chunks": [ { @@ -34415,7 +54505,7 @@ "data": { "event": { "delta": { - "text": "nowledge_search(query=\"Tor", + "text": "nowledge_search", "type": "text" }, "event_type": { @@ -34435,7 +54525,67 @@ "data": { "event": { "delta": { - "text": "chtune documentation\")]", + "text": "(query=\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Torcht", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "une documentation", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\")]", "type": "text" }, "event_type": { @@ -34464,7 +54614,7 @@ "arguments": { "query": "Torchtune documentation" }, - "call_id": "385cbde8-19e8-4c8b-84ca-b75050b3666b", + "call_id": "70cd0350-8689-4bb5-a0bf-2a9d2112d08d", "tool_name": "knowledge_search" }, "type": "tool_call" @@ -34507,54 +54657,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "-7YS2sLl", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:30.668846+00:00", - "__module__": "datetime" - }, - "trace_id": "BLgI_VzNTCCRs_2T", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 39 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "-7YS2sLl", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:30.668859+00:00", - "__module__": "datetime" - }, - "trace_id": "BLgI_VzNTCCRs_2T", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 20 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "-7YS2sLl", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:30.668861+00:00", - "__module__": "datetime" - }, - "trace_id": "BLgI_VzNTCCRs_2T", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 59 } ] @@ -34611,7 +54725,7 @@ "data": { "event": { "delta": { - "text": "lama3-8B uses grouped-query", + "text": "lama3", "type": "text" }, "event_type": { @@ -34631,7 +54745,7 @@ "data": { "event": { "delta": { - "text": " attention instead of", + "text": "-", "type": "text" }, "event_type": { @@ -34651,7 +54765,147 @@ "data": { "event": { "delta": { - "text": " the standard multi-head attention.", + "text": "8", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "B uses", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " grouped-query", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " attention instead", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " standard", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " multi-head", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " attention.", "type": "text" }, "event_type": { @@ -34688,54 +54942,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "1eIEdjPP", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:18.982970+00:00", - "__module__": "datetime" - }, - "trace_id": "rNeuYcnxTSqrP6Dg", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 80 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "1eIEdjPP", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:18.983000+00:00", - "__module__": "datetime" - }, - "trace_id": "rNeuYcnxTSqrP6Dg", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 28 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "1eIEdjPP", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:18.983005+00:00", - "__module__": "datetime" - }, - "trace_id": "rNeuYcnxTSqrP6Dg", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 108 } ] @@ -34792,7 +55010,107 @@ "data": { "event": { "delta": { - "text": "lama3-8B uses grouped-query attention instead of", + "text": "lama3", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "-8", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "B uses", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " grouped", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "-query attention", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " instead of", "type": "text" }, "event_type": { @@ -34832,7 +55150,47 @@ "data": { "event": { "delta": { - "text": " multi-head attention.", + "text": " multi", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "-head attention", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", "type": "text" }, "event_type": { @@ -34869,54 +55227,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "SlTnlfYc", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:12.884663+00:00", - "__module__": "datetime" - }, - "trace_id": "liTx9auyTkyfvrBr", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 80 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "SlTnlfYc", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:12.884753+00:00", - "__module__": "datetime" - }, - "trace_id": "liTx9auyTkyfvrBr", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 28 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "SlTnlfYc", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:12.884760+00:00", - "__module__": "datetime" - }, - "trace_id": "liTx9auyTkyfvrBr", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 108 } ] @@ -34973,7 +55295,7 @@ "data": { "event": { "delta": { - "text": "nowledge_search(query=\"Llama3-8", + "text": "nowledge_search", "type": "text" }, "event_type": { @@ -34993,7 +55315,107 @@ "data": { "event": { "delta": { - "text": "B attention type\")]", + "text": "(query=\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Llama", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "3-", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "8B", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " attention type", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\")]", "type": "text" }, "event_type": { @@ -35022,7 +55444,7 @@ "arguments": { "query": "Llama3-8B attention type" }, - "call_id": "4901bbdf-8faf-4a57-b6f6-01688c6290e6", + "call_id": "d7f3056e-3c5c-4bbc-869e-b617a35bdbb4", "tool_name": "knowledge_search" }, "type": "tool_call" @@ -35065,54 +55487,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "DBPomV08", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:15.412559+00:00", - "__module__": "datetime" - }, - "trace_id": "rNeuYcnxTSqrP6Dg", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 40 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "DBPomV08", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:15.412607+00:00", - "__module__": "datetime" - }, - "trace_id": "rNeuYcnxTSqrP6Dg", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 24 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "DBPomV08", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:15.412615+00:00", - "__module__": "datetime" - }, - "trace_id": "rNeuYcnxTSqrP6Dg", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 64 } ] @@ -35169,7 +55555,107 @@ "data": { "event": { "delta": { - "text": "nowledge_search(query=\"Llama3-8B attention", + "text": "nowledge_search", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(query=\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "L", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "lama3", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "-8", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "B attention", "type": "text" }, "event_type": { @@ -35218,7 +55704,7 @@ "arguments": { "query": "Llama3-8B attention type" }, - "call_id": "dd056386-b105-47e5-bd85-07e5ae096de1", + "call_id": "cd89b59f-7caa-4669-85c8-df6ba3892e77", "tool_name": "knowledge_search" }, "type": "tool_call" @@ -35261,54 +55747,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "yjKrmpeo", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:12.041566+00:00", - "__module__": "datetime" - }, - "trace_id": "liTx9auyTkyfvrBr", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 40 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "yjKrmpeo", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:12.041591+00:00", - "__module__": "datetime" - }, - "trace_id": "liTx9auyTkyfvrBr", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 24 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "yjKrmpeo", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:12.041597+00:00", - "__module__": "datetime" - }, - "trace_id": "liTx9auyTkyfvrBr", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 64 } ] @@ -35317,6 +55767,151 @@ ], "type": "generator" }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.8190992, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79080546, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\\\", \\\"score\\\": 0.6175132, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.05570498, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " current CEO of Meta is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Mark", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Zuckerberg.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 1235 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 19 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 1254 + } + ] + } + } + ], + "type": "generator" + }, "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Meta - Leadership & Governance\\\", \\\"url\\\": \\\"https://investor.atmeta.com/leadership-and-governance/\\\", \\\"content\\\": \\\"Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. Mark is responsible for setting the overall direction and product strategy for the company. He leads the design of Meta's services and development of its core technology and infrastructure. Mark studied computer science at Harvard\\\", \\\"score\\\": 0.8342047, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.8190992, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79099923, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Wikipedia\\\", \\\"url\\\": \\\"https://en.wikipedia.org/wiki/Mark_Zuckerberg\\\", \\\"content\\\": \\\"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\\\u9648\\\\u660e\\\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\\\", \\\"score\\\": 0.05564338, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": { "chunks": [ { @@ -35516,7 +56111,132 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "brave_search.call(query=\"current CEO of Meta\")", + "tool_call": "brave", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_search.call", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(query=\"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "current", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " CEO of", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Meta\")", "type": "tool_call" }, "event_type": { @@ -35545,7 +56265,7 @@ "arguments": { "query": "current CEO of Meta" }, - "call_id": "535c272b-768b-44fe-b303-2eae022f67f5", + "call_id": "112fe886-dc25-4347-9b54-b52571b0cdb5", "tool_name": { "__enum__": "BuiltinTool", "__module__": "llama_stack.models.llama.datatypes", @@ -35592,54 +56312,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "AZ60Ocso", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:03.907918+00:00", - "__module__": "datetime" - }, - "trace_id": "hwA8OLUhQ1qa3ecF", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 34 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "AZ60Ocso", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:03.907933+00:00", - "__module__": "datetime" - }, - "trace_id": "hwA8OLUhQ1qa3ecF", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 10 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "AZ60Ocso", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:03.907936+00:00", - "__module__": "datetime" - }, - "trace_id": "hwA8OLUhQ1qa3ecF", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 44 } ] @@ -35696,7 +56380,7 @@ "data": { "event": { "delta": { - "text": " boiling point of polyjuice is -100 degrees Celsius", + "text": " boiling point", "type": "text" }, "event_type": { @@ -35716,7 +56400,107 @@ "data": { "event": { "delta": { - "text": ".", + "text": " of poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is -", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "100 degrees", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Celsius.", "type": "text" }, "event_type": { @@ -35753,54 +56537,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "drZjZkfj", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:33.852666+00:00", - "__module__": "datetime" - }, - "trace_id": "Sn0I7GFHTxKxewK2", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 77 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "drZjZkfj", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:33.852692+00:00", - "__module__": "datetime" - }, - "trace_id": "Sn0I7GFHTxKxewK2", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 23 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "drZjZkfj", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:33.852699+00:00", - "__module__": "datetime" - }, - "trace_id": "Sn0I7GFHTxKxewK2", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 100 } ] @@ -35857,7 +56605,107 @@ "data": { "event": { "delta": { - "text": " boiling point of polyjuice is -100 degrees Celsius.", + "text": " boiling point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "juice", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is -", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "100 degrees", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Celsius.", "type": "text" }, "event_type": { @@ -35894,54 +56742,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "WMEZtUXH", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:00:32.617998+00:00", - "__module__": "datetime" - }, - "trace_id": "f9RM1qaUTk2LvaVo", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 77 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "WMEZtUXH", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:00:32.618030+00:00", - "__module__": "datetime" - }, - "trace_id": "f9RM1qaUTk2LvaVo", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 23 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "WMEZtUXH", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:00:32.618036+00:00", - "__module__": "datetime" - }, - "trace_id": "f9RM1qaUTk2LvaVo", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 100 } ] @@ -35998,7 +56810,7 @@ "data": { "event": { "delta": { - "text": " function get_boiling_point is not", + "text": " boiling point", "type": "text" }, "event_type": { @@ -36018,7 +56830,7 @@ "data": { "event": { "delta": { - "text": " able", + "text": " of poly", "type": "text" }, "event_type": { @@ -36038,7 +56850,7 @@ "data": { "event": { "delta": { - "text": " to find the", + "text": "juice", "type": "text" }, "event_type": { @@ -36058,7 +56870,7 @@ "data": { "event": { "delta": { - "text": " boiling point of \"polyjuice\" as", + "text": " is", "type": "text" }, "event_type": { @@ -36078,7 +56890,7 @@ "data": { "event": { "delta": { - "text": " it", + "text": " -", "type": "text" }, "event_type": { @@ -36098,7 +56910,7 @@ "data": { "event": { "delta": { - "text": " is not a real liquid", + "text": "100 degrees", "type": "text" }, "event_type": { @@ -36118,47 +56930,7 @@ "data": { "event": { "delta": { - "text": ". Polyju", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "__module__": "llama_stack.apis.inference.inference", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - } - }, - { - "__module__": "llama_stack.apis.inference.inference", - "__pydantic__": "ChatCompletionResponseStreamChunk", - "data": { - "event": { - "delta": { - "text": "ice is a fictional substance from the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "__module__": "llama_stack.apis.inference.inference", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - } - }, - { - "__module__": "llama_stack.apis.inference.inference", - "__pydantic__": "ChatCompletionResponseStreamChunk", - "data": { - "event": { - "delta": { - "text": " Harry Potter series.", + "text": " Celsius.", "type": "text" }, "event_type": { @@ -36195,55 +56967,19 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "p7Vx9VAq", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:28.232189+00:00", - "__module__": "datetime" - }, - "trace_id": "WKEqFugATCeCl8mc", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 77 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "p7Vx9VAq", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:28.232325+00:00", - "__module__": "datetime" - }, - "trace_id": "WKEqFugATCeCl8mc", - "type": "metric", - "unit": "tokens", - "value": 51 + "unit": null, + "value": 23 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "p7Vx9VAq", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:28.232334+00:00", - "__module__": "datetime" - }, - "trace_id": "WKEqFugATCeCl8mc", - "type": "metric", - "unit": "tokens", - "value": 128 + "unit": null, + "value": 100 } ] } @@ -37362,7 +58098,7 @@ "data": { "event": { "delta": { - "text": "get_boiling_point(liquid_name='polyjuice", + "text": "get", "type": "text" }, "event_type": { @@ -37382,7 +58118,187 @@ "data": { "event": { "delta": { - "text": "', celcius=True)]", + "text": "_bo", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "iling_point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "liquid", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_name='", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "polyju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice',", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " celci", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "us=True", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ")]", "type": "text" }, "event_type": { @@ -37412,7 +58328,7 @@ "celcius": true, "liquid_name": "polyjuice" }, - "call_id": "d43b2636-903d-430d-8389-91eefe5a1d75", + "call_id": "dcf85480-0aa4-4f86-9720-6c030aa67344", "tool_name": "get_boiling_point" }, "type": "tool_call" @@ -37455,54 +58371,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "9EBiVeAT", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:32.221646+00:00", - "__module__": "datetime" - }, - "trace_id": "7kB12OwpSUOcwmJV", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 30 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "9EBiVeAT", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:32.221673+00:00", - "__module__": "datetime" - }, - "trace_id": "7kB12OwpSUOcwmJV", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 28 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "9EBiVeAT", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:32.221680+00:00", - "__module__": "datetime" - }, - "trace_id": "7kB12OwpSUOcwmJV", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 58 } ] @@ -37559,7 +58439,7 @@ "data": { "event": { "delta": { - "text": "get_boiling_point(liquid_name", + "text": "get_bo", "type": "text" }, "event_type": { @@ -37579,7 +58459,167 @@ "data": { "event": { "delta": { - "text": "='polyjuice', celcius=True)]", + "text": "iling", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(liquid", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_name='", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "polyju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice',", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " celci", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "us=True", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ")]", "type": "text" }, "event_type": { @@ -37609,7 +58649,7 @@ "celcius": true, "liquid_name": "polyjuice" }, - "call_id": "0548b2ef-daa4-4099-bb2c-b34f00752339", + "call_id": "a97762ff-007a-4706-b0b1-5ccfd0dbbc89", "tool_name": "get_boiling_point" }, "type": "tool_call" @@ -37652,54 +58692,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "lc3YWIQH", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:00:31.366139+00:00", - "__module__": "datetime" - }, - "trace_id": "zDQV0rn3TNKfByA0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 30 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "lc3YWIQH", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:00:31.366166+00:00", - "__module__": "datetime" - }, - "trace_id": "zDQV0rn3TNKfByA0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 28 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "lc3YWIQH", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:00:31.366172+00:00", - "__module__": "datetime" - }, - "trace_id": "zDQV0rn3TNKfByA0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 58 } ] @@ -37756,7 +58760,7 @@ "data": { "event": { "delta": { - "text": "juice is a fictional potion from", + "text": "juice", "type": "text" }, "event_type": { @@ -37776,7 +58780,7 @@ "data": { "event": { "delta": { - "text": " the Harry Potter series by J.K. Rowling. As it", + "text": " is", "type": "text" }, "event_type": { @@ -37796,7 +58800,7 @@ "data": { "event": { "delta": { - "text": "'s not a real substance, it doesn't have a boiling point", + "text": " a fictional", "type": "text" }, "event_type": { @@ -37816,7 +58820,7 @@ "data": { "event": { "delta": { - "text": ". Polyjuice Potion is a magical concoction", + "text": " potion from", "type": "text" }, "event_type": { @@ -37836,7 +58840,7 @@ "data": { "event": { "delta": { - "text": " that allows the drinker to assume the form and", + "text": " the", "type": "text" }, "event_type": { @@ -37856,7 +58860,7 @@ "data": { "event": { "delta": { - "text": " appearance", + "text": " Harry Potter", "type": "text" }, "event_type": { @@ -37876,7 +58880,7 @@ "data": { "event": { "delta": { - "text": " of another person, but it's not a physical substance that can", + "text": " series by", "type": "text" }, "event_type": { @@ -37896,7 +58900,7 @@ "data": { "event": { "delta": { - "text": " be measured or analyzed in the same way as real-world", + "text": " J", "type": "text" }, "event_type": { @@ -37916,7 +58920,7 @@ "data": { "event": { "delta": { - "text": " chemicals.\n\nIf you", + "text": ".K.", "type": "text" }, "event_type": { @@ -37936,7 +58940,7 @@ "data": { "event": { "delta": { - "text": " have any other questions or", + "text": " Rowling.", "type": "text" }, "event_type": { @@ -37956,7 +58960,927 @@ "data": { "event": { "delta": { - "text": " if there's anything else I can help you with, feel free to ask", + "text": " As it", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'s not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " a real", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " substance,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " it doesn", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'t", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " have a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " point.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "juice", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Potion", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " magical", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " concoction", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " that allows", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the drink", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "er to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " assume the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " form", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and appearance", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " another person", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", but", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " it's", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " not a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " physical substance", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " that", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " can be", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " measured or", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " analyzed in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the same", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " way as", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " real-world", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " chemicals.\n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "If you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " have any", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " other questions", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " or if", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " there", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'s anything", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " else I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " can help", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " with,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " feel free", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to ask", "type": "text" }, "event_type": { @@ -38013,54 +59937,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "M0oC9v8Y", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:30.531648+00:00", - "__module__": "datetime" - }, - "trace_id": "0CMlh2kQShSVm3zE", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 30 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "M0oC9v8Y", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:30.531666+00:00", - "__module__": "datetime" - }, - "trace_id": "0CMlh2kQShSVm3zE", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 113 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "M0oC9v8Y", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:30.531671+00:00", - "__module__": "datetime" - }, - "trace_id": "0CMlh2kQShSVm3zE", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 143 } ] @@ -38117,7 +60005,7 @@ "data": { "event": { "delta": { - "text": "get_boiling_point(liquid_name='polyjuice', cel", + "text": "get_bo", "type": "text" }, "event_type": { @@ -38137,7 +60025,107 @@ "data": { "event": { "delta": { - "text": "cius=True)]", + "text": "iling_point(liquid_name='poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice', cel", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "cius", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "=True", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ")]", "type": "text" }, "event_type": { @@ -38167,7 +60155,7 @@ "celcius": true, "liquid_name": "polyjuice" }, - "call_id": "acbb04a1-08f4-4277-9b66-aadda2fa2be7", + "call_id": "81e4629c-9ed7-4fda-b0fd-8db41cd00407", "tool_name": "get_boiling_point" }, "type": "tool_call" @@ -38210,54 +60198,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "jMXDDKvp", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:26.175063+00:00", - "__module__": "datetime" - }, - "trace_id": "44TwzIrGS2aqfbVn", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 30 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "jMXDDKvp", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:26.175128+00:00", - "__module__": "datetime" - }, - "trace_id": "44TwzIrGS2aqfbVn", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 28 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "jMXDDKvp", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T02:04:26.175137+00:00", - "__module__": "datetime" - }, - "trace_id": "44TwzIrGS2aqfbVn", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 58 } ] @@ -38427,6 +60379,191 @@ ], "type": "generator" }, + "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n if n <= 1:\\n return False\\n if n == 2:\\n return True\\n if n % 2 == 0:\\n return False\\n max_divisor = int(n**0.5) + 1\\n for d in range(3, max_divisor, 2):\\n if n % d == 0:\\n return False\\n return True\\n\\ndef nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(nth_prime(100))\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " 100", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "th prime", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " number is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " 541", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "metric": "prompt_tokens", + "unit": null, + "value": 243 + }, + { + "metric": "completion_tokens", + "unit": null, + "value": 20 + }, + { + "metric": "total_tokens", + "unit": null, + "value": 263 + } + ] + } + } + ], + "type": "generator" + }, "[[\"meta-llama/Llama-3.3-70B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { "chunks": [ { @@ -38485,7 +60622,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "def is_prime(n):\n if n <= 1:\n return False", + "tool_call": "def is", "type": "tool_call" }, "event_type": { @@ -38510,7 +60647,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "\n if n <= 3:\n return True", + "tool_call": "_prime(n", "type": "tool_call" }, "event_type": { @@ -38535,7 +60672,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "\n if n % 2 == 0 or n % 3", + "tool_call": "):\n", "type": "tool_call" }, "event_type": { @@ -38560,7 +60697,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " == 0:\n return False\n i = 5\n ", + "tool_call": " ", "type": "tool_call" }, "event_type": { @@ -38585,7 +60722,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " while i * i <= n:\n if n", + "tool_call": " if n", "type": "tool_call" }, "event_type": { @@ -38610,7 +60747,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " % i == 0 or n % (i", + "tool_call": " <= ", "type": "tool_call" }, "event_type": { @@ -38635,7 +60772,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " + 2) == 0:\n return False\n i +=", + "tool_call": "1", "type": "tool_call" }, "event_type": { @@ -38660,7 +60797,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " 6\n return True\n\ndef nth_prime(n):\n count =", + "tool_call": ":\n ", "type": "tool_call" }, "event_type": { @@ -38685,7 +60822,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " 0\n num = 2\n while True:\n if", + "tool_call": " return False", "type": "tool_call" }, "event_type": { @@ -38710,7 +60847,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": " is_prime(num):\n count += 1\n if count == n", + "tool_call": "\n ", "type": "tool_call" }, "event_type": { @@ -38735,7 +60872,7 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": ":\n return num\n num += 1\n\nprint(nth_prime", + "tool_call": " if n", "type": "tool_call" }, "event_type": { @@ -38760,7 +60897,1707 @@ "__module__": "llama_stack.apis.common.content_types", "value": "in_progress" }, - "tool_call": "(100))", + "tool_call": " == ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "2:\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " return True", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " if n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " % ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "2 ==", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 0", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ":\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " return", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " False", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " max_div", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "isor", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " =", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " int(n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "**0", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "5)", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " + ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "1\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " for", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " d in", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " range(", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "3,", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " max_div", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "isor,", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 2", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "):\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " if n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " % d", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " == ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "0:\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " return", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " False", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " return", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " True\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "def nth", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_prime(n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "):\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " count", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " =", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 0", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " num =", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 2", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " while True", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ":\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " if is", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_prime(num", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "):\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " count +=", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 1", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " if count", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " == n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ":\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " return num", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " num +=", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 1", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(nth", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_prime(", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "100))", "type": "tool_call" }, "event_type": { @@ -38787,9 +62624,9 @@ }, "tool_call": { "arguments": { - "code": "def is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef nth_prime(n):\n count = 0\n num = 2\n while True:\n if is_prime(num):\n count += 1\n if count == n:\n return num\n num += 1\n\nprint(nth_prime(100))" + "code": "def is_prime(n):\n if n <= 1:\n return False\n if n == 2:\n return True\n if n % 2 == 0:\n return False\n max_divisor = int(n**0.5) + 1\n for d in range(3, max_divisor, 2):\n if n % d == 0:\n return False\n return True\n\ndef nth_prime(n):\n count = 0\n num = 2\n while True:\n if is_prime(num):\n count += 1\n if count == n:\n return num\n num += 1\n\nprint(nth_prime(100))" }, - "call_id": "e1110bc1-dc83-480d-ad33-09d49f5ccc8d", + "call_id": "67e0a41c-4428-47a9-b276-df436c014992", "tool_name": { "__enum__": "BuiltinTool", "__module__": "llama_stack.models.llama.datatypes", @@ -38836,54 +62673,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "5J3hM-La", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:09.121100+00:00", - "__module__": "datetime" - }, - "trace_id": "snO106yxStaL10ow", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 40 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "5J3hM-La", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:09.121127+00:00", - "__module__": "datetime" - }, - "trace_id": "snO106yxStaL10ow", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 10 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "5J3hM-La", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:44:09.121132+00:00", - "__module__": "datetime" - }, - "trace_id": "snO106yxStaL10ow", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 50 } ] @@ -38940,7 +62741,107 @@ "data": { "event": { "delta": { - "text": "plexity the company was founded in 2022.", + "text": "plexity", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the company", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " was founded", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " in ", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "2022", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".", "type": "text" }, "event_type": { @@ -38977,54 +62878,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "6jxCq3gU", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:50.430436+00:00", - "__module__": "datetime" - }, - "trace_id": "XhZWljYTTDCYF7vI", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 68 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "6jxCq3gU", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:50.430477+00:00", - "__module__": "datetime" - }, - "trace_id": "XhZWljYTTDCYF7vI", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 22 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "6jxCq3gU", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:50.430489+00:00", - "__module__": "datetime" - }, - "trace_id": "XhZWljYTTDCYF7vI", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 90 } ] @@ -39081,7 +62946,7 @@ "data": { "event": { "delta": { - "text": "nowledge_search(query=\"Perplexity the company", + "text": "nowledge_search", "type": "text" }, "event_type": { @@ -39101,7 +62966,87 @@ "data": { "event": { "delta": { - "text": " founding date\")]", + "text": "(query=\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Perplex", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ity the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " company founding", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " date\")]", "type": "text" }, "event_type": { @@ -39130,7 +63075,7 @@ "arguments": { "query": "Perplexity the company founding date" }, - "call_id": "199ef050-bc11-4e4b-935d-f5241c3f40ef", + "call_id": "1d69f6cc-160a-47d8-a452-4deb322954fd", "tool_name": "knowledge_search" }, "type": "tool_call" @@ -39173,54 +63118,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "m4wMGuSN", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:49.880525+00:00", - "__module__": "datetime" - }, - "trace_id": "XhZWljYTTDCYF7vI", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 29 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "m4wMGuSN", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:49.880576+00:00", - "__module__": "datetime" - }, - "trace_id": "XhZWljYTTDCYF7vI", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 23 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "m4wMGuSN", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:49.880585+00:00", - "__module__": "datetime" - }, - "trace_id": "XhZWljYTTDCYF7vI", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 52 } ] @@ -39277,7 +63186,7 @@ "data": { "event": { "delta": { - "text": " NBA was created on August 3, 1949, with", + "text": " NBA was", "type": "text" }, "event_type": { @@ -39297,7 +63206,7 @@ "data": { "event": { "delta": { - "text": " the merger of the Basketball Association of America (BAA) and the National", + "text": " created on", "type": "text" }, "event_type": { @@ -39317,7 +63226,327 @@ "data": { "event": { "delta": { - "text": " Basketball League (NBL).", + "text": " August ", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "3", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", ", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "1949", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", with", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the merger", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Basketball Association", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of America", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " (B", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "AA)", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " National", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Basketball League", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " (N", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "BL).", "type": "text" }, "event_type": { @@ -39354,54 +63583,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "OyfVMRgR", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:53.322420+00:00", - "__module__": "datetime" - }, - "trace_id": "TMrhR55CR-KrmGp0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 63 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "OyfVMRgR", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:53.322482+00:00", - "__module__": "datetime" - }, - "trace_id": "TMrhR55CR-KrmGp0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 45 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "OyfVMRgR", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:53.322490+00:00", - "__module__": "datetime" - }, - "trace_id": "TMrhR55CR-KrmGp0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 108 } ] @@ -39458,7 +63651,87 @@ "data": { "event": { "delta": { - "text": "nowledge_search(query=\"NBA creation date\")]", + "text": "nowledge_search", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "(query=\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "NBA", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " creation date", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\")]", "type": "text" }, "event_type": { @@ -39487,7 +63760,7 @@ "arguments": { "query": "NBA creation date" }, - "call_id": "388e55ab-448a-4a98-905b-196c051bdeea", + "call_id": "bac8b49d-537e-4c73-bb9e-c06475903366", "tool_name": "knowledge_search" }, "type": "tool_call" @@ -39530,54 +63803,18 @@ }, "metrics": [ { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "prompt_tokens", - "span_id": "QpFMmy3B", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:52.235138+00:00", - "__module__": "datetime" - }, - "trace_id": "TMrhR55CR-KrmGp0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 27 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "completion_tokens", - "span_id": "QpFMmy3B", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:52.235160+00:00", - "__module__": "datetime" - }, - "trace_id": "TMrhR55CR-KrmGp0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 20 }, { - "attributes": { - "model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_id": "fireworks" - }, "metric": "total_tokens", - "span_id": "QpFMmy3B", - "timestamp": { - "__class__": "datetime", - "__datetime__": "2025-03-07T01:45:52.235165+00:00", - "__module__": "datetime" - }, - "trace_id": "TMrhR55CR-KrmGp0", - "type": "metric", - "unit": "tokens", + "unit": null, "value": 47 } ] diff --git a/tests/integration/fixtures/recorded_responses/invoke_tool.json b/tests/integration/fixtures/recorded_responses/invoke_tool.json index 76191e992..f52e34333 100644 --- a/tests/integration/fixtures/recorded_responses/invoke_tool.json +++ b/tests/integration/fixtures/recorded_responses/invoke_tool.json @@ -25,6 +25,19 @@ } } }, + "[[], {\"kwargs\": {\"code\": \"def is_prime(n):\\n if n <= 1:\\n return False\\n if n == 2:\\n return True\\n if n % 2 == 0:\\n return False\\n max_divisor = int(n**0.5) + 1\\n for d in range(3, max_divisor, 2):\\n if n % d == 0:\\n return False\\n return True\\n\\ndef nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(nth_prime(100))\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { "type": "value", "value": { @@ -64,6 +77,19 @@ } } }, + "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the shape of the dataframe (number of rows and columns)\\nprint(df.shape)\\n\\n# Print the column names\\nprint(df.columns)\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print a summary of the dataframe (count, mean, std, min, 25%, 50%, 75%, max)\\nprint(df.describe())\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"\")\\nprint(df.head())\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { "type": "value", "value": { @@ -129,6 +155,19 @@ } } }, + "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Convert the 'Year' column to datetime\\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\\n\\n# Group by 'Year' and calculate the average inflation\\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\\n\\n# Plot the average inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Convert the 'Year' column to datetime\\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\\n\\n# Group by 'Year' and calculate the average inflation\\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\\n\\n# Plot the average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { "type": "value", "value": { @@ -374,23 +413,23 @@ "type": "text" }, { - "text": "Result 1:\nDocument_id:24443\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\nlook like so:\n\n.. code-block:: python\n\n from torchtune.datasets import chat_dataset\n from torchtune.models.llama3 import llama3_tokenizer\n\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\n ds = chat_dataset(\n tokenizer=tokenizer,\n source=\"json\",\n data_files=\"data/my_data.json\",\n split=\"train\",\n conversation_column=\"dialogue\",\n conversation_style=\"sharegpt\",\n )\n\n.. code-block:: yaml\n\n # In config\n tokenizer:\n _component_: torchtune.models.llama3.llama3_tokenizer\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\n\n dataset:\n _component_: torchtune.datasets.chat_dataset\n source: json\n data_files: data/my_data.json\n split: train\n conversation_column: dialogue\n conversation_style: sharegpt\n\n.. note::\n You can pass in any keyword argument for `load_dataset `_ into all our\n Dataset classes and they will honor them. This is useful for common parameters\n such as specifying the data split with :code:`split` or configuration with\n :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations `_ into all our\n Dataset classes and they will honor them. This is useful for common parameters\n such as specifying the data split with :code:`split` or configuration with\n :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW `.\n.. .. _glossary_fsdp2:\n\n", + "text": "Result 3:\nDocument_id:122a9\nContent: ` module, which we swap\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP `.\n.. .. _glossary_fsdp2:\n\n", "type": "text" }, { - "text": "Result 4:\nDocument_id:961ff\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", + "text": "Result 4:\nDocument_id:700ad\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", "type": "text" }, { - "text": "Result 5:\nDocument_id:b49f7\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.use_dora=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.apply_lora_to_mlp=True \\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n model.lora_rank=16 \\\n model.lora_alpha=32 \\\n model.use_dora=True \\\n model.quantize_base=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n apply_lora_to_mlp: True\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n lora_rank: 16\n lora_alpha: 32\n use_dora: True\n quantize_base: True\n\n\n.. note::\n\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP `.\n.. .. _glossary_fsdp2:\n\n", + "text": "Result 5:\nDocument_id:122a9\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.use_dora=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.apply_lora_to_mlp=True \\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n model.lora_rank=16 \\\n model.lora_alpha=32 \\\n model.use_dora=True \\\n model.quantize_base=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n apply_lora_to_mlp: True\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n lora_rank: 16\n lora_alpha: 32\n use_dora: True\n quantize_base: True\n\n\n.. note::\n\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP `.\n.. .. _glossary_fsdp2:\n\n", "type": "text" }, { @@ -402,11 +441,11 @@ "error_message": null, "metadata": { "document_ids": [ - "24443dfb-a0b3-4ce8-820e-3fb1f12364bb", - "961ff2d1-8887-41ef-a4fe-fa4cbab7b932", - "b49f7985-6615-4dcf-99be-d1765b6a6fc6", - "961ff2d1-8887-41ef-a4fe-fa4cbab7b932", - "b49f7985-6615-4dcf-99be-d1765b6a6fc6" + "895539dd-a627-4c02-94d7-6591cd0ce00f", + "700ad5a6-e318-48ad-99b2-93934c5d7f8c", + "122a966a-6d33-4482-87a6-f5d16e9f92be", + "700ad5a6-e318-48ad-99b2-93934c5d7f8c", + "122a966a-6d33-4482-87a6-f5d16e9f92be" ] } } @@ -418,7 +457,7 @@ "__module__": "llama_stack.apis.tools.tools", "__pydantic__": "ToolInvocationResult", "data": { - "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Meta - Leadership & Governance\", \"url\": \"https://investor.atmeta.com/leadership-and-governance/\", \"content\": \"Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. Mark is responsible for setting the overall direction and product strategy for the company. He leads the design of Meta's services and development of its core technology and infrastructure. Mark studied computer science at Harvard\", \"score\": 0.8342047, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"The 11 People Running Meta's $1 Trillion Social Media and ... - Observer\", \"url\": \"https://observer.com/2024/01/meta-facebook-top-executives/\", \"content\": \"Meta has one of the most stable leadership team in the tech industry. Almost all of Meta's top executives have been with the company for well over a decade. ... 39, cofounder, chairman and CEO\", \"score\": 0.45536873, \"raw_content\": null}, {\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Meta leadership: images of senior executives for download to use in articles about the company.\", \"score\": 0.21026355, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/Mark_Zuckerberg\", \"content\": \"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\u9648\\u660e\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\", \"score\": 0.05564338, \"raw_content\": null}]}", + "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\u2018Boz\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79080546, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\u2019s finance and facilities team to keep track of the company\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta - Leadership & Governance\", \"url\": \"https://investor.atmeta.com/leadership-and-governance/\", \"content\": \"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\", \"score\": 0.6175132, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/Mark_Zuckerberg\", \"content\": \"They began dating in 2003.[175] In September 2010, Chan, who was a medical student at the University of California, San Francisco at the time,[176] moved into his rented house in Palo Alto, California.[177][178] They married on May 19, 2012, in the grounds of his mansion in an event that also celebrated her graduation from medical school.[179][180] Zuckerberg revealed in July 2015 that they were expecting a baby girl and that Chan had previously experienced three miscarriages.[181] Their first daughter was born in December 2015.[182] They announced in a Chinese New Year video that their daughter's Chinese name is Chen Mingyu (Chinese: \\u9648\\u660e\\u5b87).[183] Their second daughter was born in August 2017.[184] Zuckerberg and his wife welcomed their third daughter in March 2023 and announced the news across his social media pages.[185] The couple also have a Puli dog named Beast,[186] who has over two million followers on Facebook.[187] Zuckerberg commissioned the visual artist Daniel Arsham to build a 7-foot-tall sculpture of his wife, which was unveiled in 2024.[188]\", \"score\": 0.05570498, \"raw_content\": null}]}", "error_code": null, "error_message": null, "metadata": null @@ -437,23 +476,23 @@ "type": "text" }, { - "text": "Result 1:\nDocument_id:20e5d\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\n See :ref:`below` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n # Print the first layer's self-attention in the usual Llama2 model\n >>> print(base_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (pos_embeddings): RotaryPositionalEmbeddings()\n )\n\n # Print the same for Llama2 with LoRA weights\n >>> print(lora_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): LoRALinear(\n (dropout): Dropout(p=0.0, inplace=False)\n \n", + "text": "Result 2:\nDocument_id:700ad\nContent: LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n # Build Llama2 without any LoRA layers\n base_model = llama2_7b()\n\n # The default settings for lora_llama2_7b will match those for llama2_7b\n # We just need to define which layers we want LoRA applied to.\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n # layers outside of the self-attention.\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n Calling :func:`lora_llama_2_7b ` alone will not handle the definition of which parameters are trainable.\n See :ref:`below` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n # Print the first layer's self-attention in the usual Llama2 model\n >>> print(base_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (pos_embeddings): RotaryPositionalEmbeddings()\n )\n\n # Print the same for Llama2 with LoRA weights\n >>> print(lora_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): LoRALinear(\n (dropout): Dropout(p=0.0, inplace=False)\n \n", "type": "text" }, { - "text": "Result 3:\nDocument_id:20e5d\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", + "text": "Result 3:\nDocument_id:700ad\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", "type": "text" }, { - "text": "Result 4:\nDocument_id:20e5d\nContent: from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n # Assuming that base_model already has the pretrained Llama2 weights,\n # this will directly load them into your LoRA model without any conversion necessary.\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n :func:`validate_missing_and_unexpected_for_lora() `.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n # Fetch all params from the model that are associated with LoRA.\n lora_params = get_adapter_params(lora_model)\n\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n set_trainable_params(lora_model, lora_params)\n\n # Print the total number of parameters\n total_params = sum([p.numel() for p in lora_model.parameters()])\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n print(\n f\"\"\"\n {total_params} total params,\n {trainable_params}\" trainable params,\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n \"\"\"\n )\n\n 6742609920 total params,\n 4194304 trainable params,\n 0.06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n # Fetch all params from the model that are associated with LoRA.\n lora_params = get_adapter_params(lora_model)\n\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n set_trainable_params(lora_model, lora_params)\n\n # Print the total number of parameters\n total_params = sum([p.numel() for p in lora_model.parameters()])\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n print(\n f\"\"\"\n {total_params} total params,\n {trainable_params}\" trainable params,\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n \"\"\"\n )\n\n 6742609920 total params,\n 4194304 trainable params,\n 0.06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe