From 3d71e5a03695715e12852de43ce787ef3420863b Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 6 Mar 2025 14:46:29 -0800 Subject: [PATCH] test: recordable mocks use json only (#1443) # Summary: removes the use of pickle # Test Plan: Run the following with `--record-responses` first, then another time without. LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/integration/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct --- tests/integration/fixtures/common.py | 2 +- tests/integration/fixtures/recordable_mock.py | 172 +- .../recorded_responses/chat_completion.json | 49050 ++++++++-------- .../recorded_responses/chat_completion.pickle | Bin 888589 -> 0 bytes .../recorded_responses/invoke_tool.json | 1003 +- .../recorded_responses/invoke_tool.pickle | Bin 67524 -> 0 bytes 6 files changed, 23792 insertions(+), 26435 deletions(-) delete mode 100644 tests/integration/fixtures/recorded_responses/chat_completion.pickle delete mode 100644 tests/integration/fixtures/recorded_responses/invoke_tool.pickle diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index a30f85076..6a75b3adf 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -59,7 +59,7 @@ def llama_stack_client_with_mocked_inference(llama_stack_client, request): return llama_stack_client record_responses = request.config.getoption("--record-responses") - cache_dir = Path(__file__).parent / "fixtures" / "recorded_responses" + cache_dir = Path(__file__).parent / "recorded_responses" # Create a shallow copy of the client to avoid modifying the original client = copy.copy(llama_stack_client) diff --git a/tests/integration/fixtures/recordable_mock.py b/tests/integration/fixtures/recordable_mock.py index d8704a0d5..d71426336 100644 --- a/tests/integration/fixtures/recordable_mock.py +++ b/tests/integration/fixtures/recordable_mock.py @@ -3,10 +3,12 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import importlib import json import os -import pickle import re +from datetime import datetime +from enum import Enum from pathlib import Path @@ -15,18 +17,18 @@ class RecordableMock: def __init__(self, real_func, cache_dir, func_name, record=False): self.real_func = real_func - self.pickle_path = Path(cache_dir) / f"{func_name}.pickle" self.json_path = Path(cache_dir) / f"{func_name}.json" self.record = record self.cache = {} # Load existing cache if available and not recording - if self.pickle_path.exists(): + if self.json_path.exists(): try: - with open(self.pickle_path, "rb") as f: - self.cache = pickle.load(f) + with open(self.json_path, "r") as f: + self.cache = json.load(f) except Exception as e: - print(f"Error loading cache from {self.pickle_path}: {e}") + print(f"Error loading cache from {self.json_path}: {e}") + raise async def __call__(self, *args, **kwargs): """ @@ -98,23 +100,19 @@ class RecordableMock: # Check if it's a value or chunks if cached_data.get("type") == "value": # It's a regular value - return cached_data["value"] + return self._reconstruct_object(cached_data["value"]) else: # It's chunks from an async generator async def replay_generator(): for chunk in cached_data["chunks"]: - yield chunk + yield self._reconstruct_object(chunk) return replay_generator() def _create_cache_key(self, args, kwargs): """Create a hashable key from the function arguments, ignoring auto-generated IDs.""" - # Convert args and kwargs to a string representation directly - args_str = str(args) - kwargs_str = str(sorted([(k, kwargs[k]) for k in kwargs])) - - # Combine into a single key - key = f"{args_str}_{kwargs_str}" + # Convert to JSON strings with sorted keys + key = json.dumps((args, kwargs), sort_keys=True, default=self._json_default) # Post-process the key with regex to replace IDs with placeholders # Replace UUIDs and similar patterns @@ -126,83 +124,95 @@ class RecordableMock: return key def _save_cache(self): - """Save the cache to disk in both pickle and JSON formats.""" - os.makedirs(self.pickle_path.parent, exist_ok=True) + """Save the cache to disk in JSON format.""" + os.makedirs(self.json_path.parent, exist_ok=True) - # Save as pickle for exact object preservation - with open(self.pickle_path, "wb") as f: - pickle.dump(self.cache, f) - - # Also save as JSON for human readability and diffing + # Write the JSON file with pretty formatting try: - # Create a simplified version of the cache for JSON - json_cache = {} - for key, value in self.cache.items(): - if value.get("type") == "generator": - # For generators, create a simplified representation of each chunk - chunks = [] - for chunk in value["chunks"]: - chunk_dict = self._object_to_json_safe_dict(chunk) - chunks.append(chunk_dict) - json_cache[key] = {"type": "generator", "chunks": chunks} - else: - # For values, create a simplified representation - val = value["value"] - val_dict = self._object_to_json_safe_dict(val) - json_cache[key] = {"type": "value", "value": val_dict} - - # Write the JSON file with pretty formatting with open(self.json_path, "w") as f: - json.dump(json_cache, f, indent=2, sort_keys=True) + json.dump(self.cache, f, indent=2, sort_keys=True, default=self._json_default) + # write another empty line at the end of the file to make pre-commit happy + f.write("\n") except Exception as e: print(f"Error saving JSON cache: {e}") - def _object_to_json_safe_dict(self, obj): - """Convert an object to a JSON-safe dictionary.""" - # Handle enum types - if hasattr(obj, "value") and hasattr(obj.__class__, "__members__"): - return {"__enum__": obj.__class__.__name__, "value": obj.value} + def _json_default(self, obj): + """Default function for JSON serialization of objects.""" + + if isinstance(obj, datetime): + return { + "__datetime__": obj.isoformat(), + "__module__": obj.__class__.__module__, + "__class__": obj.__class__.__name__, + } + + if isinstance(obj, Enum): + return { + "__enum__": obj.__class__.__name__, + "value": obj.value, + "__module__": obj.__class__.__module__, + } # Handle Pydantic models if hasattr(obj, "model_dump"): - return self._process_dict(obj.model_dump()) - elif hasattr(obj, "dict"): - return self._process_dict(obj.dict()) + model_data = obj.model_dump() + return { + "__pydantic__": obj.__class__.__name__, + "__module__": obj.__class__.__module__, + "data": model_data, + } - # Handle regular objects with __dict__ - try: - return self._process_dict(vars(obj)) - except Exception as e: - print(f"Error converting object to JSON-safe dict: {e}") - # If we can't get a dict, convert to string - return str(obj) + def _reconstruct_object(self, data): + """Reconstruct an object from its JSON representation.""" + if isinstance(data, dict): + # Check if this is a serialized datetime + if "__datetime__" in data: + try: + module_name = data.get("__module__", "datetime") + class_name = data.get("__class__", "datetime") - def _process_dict(self, d): - """Process a dictionary to make all values JSON-safe.""" - if not isinstance(d, dict): - return d + # Try to import the specific datetime class + module = importlib.import_module(module_name) + dt_class = getattr(module, class_name) - result = {} - for k, v in d.items(): - if isinstance(v, dict): - result[k] = self._process_dict(v) - elif isinstance(v, list): - result[k] = [ - self._process_dict(item) - if isinstance(item, dict) - else self._object_to_json_safe_dict(item) - if hasattr(item, "__dict__") - else item - for item in v - ] - elif hasattr(v, "value") and hasattr(v.__class__, "__members__"): - # Handle enum - result[k] = {"__enum__": v.__class__.__name__, "value": v.value} - elif hasattr(v, "__dict__"): - # Handle nested objects - result[k] = self._object_to_json_safe_dict(v) - else: - # Basic types - result[k] = v + # Parse the ISO format string + dt = dt_class.fromisoformat(data["__datetime__"]) + return dt + except (ImportError, AttributeError, ValueError) as e: + print(f"Error reconstructing datetime: {e}") + return data - return result + # Check if this is a serialized enum + elif "__enum__" in data: + try: + module_name = data.get("__module__", "builtins") + enum_class = self._import_class(module_name, data["__enum__"]) + return enum_class(data["value"]) + except (ImportError, AttributeError) as e: + print(f"Error reconstructing enum: {e}") + return data + + # Check if this is a serialized Pydantic model + elif "__pydantic__" in data: + try: + module_name = data.get("__module__", "builtins") + model_class = self._import_class(module_name, data["__pydantic__"]) + return model_class(**self._reconstruct_object(data["data"])) + except (ImportError, AttributeError) as e: + print(f"Error reconstructing Pydantic model: {e}") + return data + + # Regular dictionary + return {k: self._reconstruct_object(v) for k, v in data.items()} + + # Handle lists + elif isinstance(data, list): + return [self._reconstruct_object(item) for item in data] + + # Return primitive types as is + return data + + def _import_class(self, module_name, class_name): + """Import a class from a module.""" + module = __import__(module_name, fromlist=[class_name]) + return getattr(module, class_name) diff --git a/tests/integration/fixtures/recorded_responses/chat_completion.json b/tests/integration/fixtures/recorded_responses/chat_completion.json index 9e70e3df0..e19cd8ba3 100644 --- a/tests/integration/fixtures/recorded_responses/chat_completion.json +++ b/tests/integration/fixtures/recorded_responses/chat_completion.json @@ -1,26140 +1,23384 @@ { - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100'), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'false'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"false\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { "chunks": [ { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " boiling point of polyjuice is -100 degrees Fahrenheit.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100'), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'false'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " boiling point of polyjuice is -100 degrees", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Fahrenheit.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\": \"get_boiling_point", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\", \"parameters\": {\"liquid_name\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"polyjuice\", \"cel", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "cius\": \"false\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": { - "arguments": { - "celcius": "false", - "liquid_name": "polyjuice" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling point of polyjuice is -100", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " degrees Fahrenheit.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "dc0f86d3-2b7a-45b0-8e58-8f49c9942190", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\": \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "get_boiling_point\", \"parameters", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\": {\"liquid_name\": \"polyjuice\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "celcius\": \"false\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": "false", - "liquid_name": "polyjuice" + "metric": "prompt_tokens", + "span_id": "9ksjMloe", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:58.345129+00:00", + "__module__": "datetime" }, - "call_id": "00c0968b-d7d4-450d-a6ff-03d64ae9f772", - "tool_name": "get_boiling_point" + "trace_id": "6aGYLk4UShyrQ7uz", + "type": "metric", + "unit": "tokens", + "value": 139 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"get_bo", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "iling_point\", \"parameters\": {\"liquid_name\": \"poly", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "juice\", \"celcius\": \"true\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": "true", - "liquid_name": "polyjuice" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "510ca34b-5ba9-4d5f-9ff3-c56de756fc95", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\", \"celcius\": \"true\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": "true", - "liquid_name": "polyjuice" + "metric": "completion_tokens", + "span_id": "9ksjMloe", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:58.345170+00:00", + "__module__": "datetime" }, - "call_id": "eda85f20-da80-4e11-a0e4-3849159ae70f", - "tool_name": "get_boiling_point" + "trace_id": "6aGYLk4UShyrQ7uz", + "type": "metric", + "unit": "tokens", + "value": 23 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " boiling point of polyjuice is -100\u00b0C.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " boiling point of polyjuice is -100\u00b0C.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point_with_metadata', arguments={'liquid_name': 'polyjuice', 'celcius': 'true'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point_with_metadata', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point_with_metadata', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " boiling point of polyjuice is -100\u00b0C.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\", \"parameters\": {\"liquid_name\": \"", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "polyjuice\", \"celcius\": \"true\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": "true", - "liquid_name": "polyjuice" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "ac699f8a-43ca-4f0b-abd4-0597722b42ee", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": \"get_boiling_point\", \"parameters", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": {\"liquid_name\": \"poly", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "juice\", \"celcius\": \"true\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": "true", - "liquid_name": "polyjuice" + "metric": "total_tokens", + "span_id": "9ksjMloe", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:58.345177+00:00", + "__module__": "datetime" }, - "call_id": "8b8b3ad5-5e47-4f56-a823-e2d82fa72d9c", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "trace_id": "6aGYLk4UShyrQ7uz", + "type": "metric", + "unit": "tokens", + "value": 162 + } + ] + } } ], "type": "generator" }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point_with_metadata', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { "chunks": [ { - "event": { - "delta": { - "text": "", - "type": "text" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" }, - "tool_call": "", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"get_boiling_point", + "type": "text" }, - "tool_call": "{\"type\": \"function\", \"name\": \"", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\", \"parameters\": {\"liquid_name\":", + "type": "text" }, - "tool_call": "get_boiling_point_with_metadata\", \"", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " \"polyjuice\", \"celcius\": \"false\"}}", + "type": "text" }, - "tool_call": "parameters\": {\"liquid_name\": \"poly", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "juice\", \"celcius\": \"true\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": "true", - "liquid_name": "polyjuice" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" }, - "call_id": "3438f2d7-895f-4a94-8e1f-c2f01860ce88", - "tool_name": "get_boiling_point_with_metadata" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Give me a sentence that contains the word: hello', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " customer smiled and said \"hello\" to the friendly store clerk", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\ndf.head()'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.info())\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the `bwrap.core` module", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " is not found. This is likely because the `bwrap` package is not installed", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". To fix this, you can install the `bwrap", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "` package using pip:\n\n```\npip install bwrap\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "```\n\nHowever, since the `bwrap", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "` package is not a real package, you can ignore this error", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and continue with the code.\n\nThe code above will print a summary of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the CSV file, including the number of non-null values in each column", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", the data types of each column, and a summary of the central", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " tendency and dispersion of each numeric column.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\ndf.head()'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/c", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "z/vyh7y1d11xg881lsxsshnc5", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "c0000gn/T/tmpkbnyoruj/fzDfY", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "IPeinflation.csv\")\nprint(df.info())\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "print(df.describe())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpkbnyoruj/fzDfYIPeinflation.csv\")\nprint(df.info())\nprint(df.describe())" + "tool_call": { + "arguments": { + "celcius": "false", + "liquid_name": "polyjuice" + }, + "call_id": "55492018-ad19-4593-9171-2b5dc2089960", + "tool_name": "get_boiling_point" }, - "call_id": "3fb76365-1f1f-4d06-a7d2-970ad7108e2b", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())\\nprint(df.info())\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the `b", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "wrap.core` module is not found", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". This is likely because the `", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "bwrap` package is not installed", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". To fix this, you can install the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " `bwrap` package using pip:\n\n```\npip install", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " bwrap\n```\n\nHowever, if", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " you don't have the `bwrap` package installed,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " you can't use the `", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "b", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "wrap.core` module.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " In this case, you can", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " try to load the CSV file using the `p", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "andas` library directly.\n\nHere is the corrected code:\n\n```", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "python\nimport pandas as pd\ndf = pd.read_csv", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(\"/var/folders/cz/vyh7y1d11x", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "g881lsxsshnc5c000", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "0gn/T/tmp8d5c", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "8spc/zOZSE5", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "zcinflation.csv\")\nprint(df.head())\nprint(df.info())\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "print(df.describe())\n```\n\nThis code will", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " load the CSV file and print the first few rows, information about", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the data, and summary statistics.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())\\nprint(df.info())\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the `bwrap.core` module", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " is not found. This is likely", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " because the `bwrap` package is not installed. To fix this", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", you can install the `bwrap` package using pip:\n\n```\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "pip install bwrap\n```\n\nHowever, if you don't have", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " permission to install packages, you can use the `knowledge_search` function to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " get information about the CSV file instead:\n\n```\n{\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"type\": \"function\",\n \"name\": \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "knowledge_search\",\n \"parameters\": {\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"query\": \"describe a csv file\"\n }\n}\n``", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "`\n\nThis will return a description of the CSV file.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": "", - "type": "tool_call" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/c", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" }, - "tool_call": "z/vyh7y1d11xg881lsxsshnc", - "type": "tool_call" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "5c0000gn/T/tmp8d5c8spc", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "/zOZSE5zcinflation.csv\")\nprint(df.head())\nprint", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "(df.info())\nprint(df.describe())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmp8d5c8spc/zOZSE5zcinflation.csv\")\nprint(df.head())\nprint(df.info())\nprint(df.describe())" + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "09b4d9a1-8ee4-4de4-a5a3-91cad464e668", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\ndf = pd.read", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "_csv(\"/var/folders/cz/v", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "yh7y1d11xg881", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "lsxsshnc5c0000gn/T/tmpn9tl", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "gts1/qYsQ3ZJLinflation.csv", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\")\nprint(df.head())\nprint(df.info())\nprint(df.describe())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpn9tlgts1/qYsQ3ZJLinflation.csv\")\nprint(df.head())\nprint(df.info())\nprint(df.describe())" + "metric": "prompt_tokens", + "span_id": "vTzYAYfO", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:56.985637+00:00", + "__module__": "datetime" }, - "call_id": "6c3c4895-55a7-4083-b5d1-6ee42bcbe5fa", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "trace_id": "H8ytqaQLQXe6sEEJ", + "type": "metric", + "unit": "tokens", + "value": 91 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'m unable to access the file you provided. However, I can", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " suggest a general approach to describe a CSV file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".\n\nYou can use the pandas", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " library in Python to load and inspect the CSV", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file. Here's a general outline of the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " steps you can follow:\n\n1. Import the pandas library:", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " `import pandas as pd`\n2. Load the CSV file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " into a dataframe: `df = pd.read_csv('file.csv", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "')`\n3. Print the first few rows", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " of the dataframe: `print(df.head())`\n4", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Print the data types of each column", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ": `print(df.dtypes)`\n5", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Print the summary statistics of the dataframe:", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " `print(df.describe())`\n\nThis will give you a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " general idea of the structure and content of the CSV file.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " If you need more specific information, you can use other pandas functions", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to inspect the dataframe.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport code_interpreter\n\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Load the CSV file\ndf = pd.read_csv(\"/", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "var/folders/cz/vyh7y", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "1d11xg881lsxsshnc5c000", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "0gn/T/tmpjxdo91ce/g1r3", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "WGZRinflation.csv\")\n\n# Print the first few rows of", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " the dataframe\nprint(df.head())\n\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Print the data types of each column", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\nprint(df.dtypes)\n\n# Print the summary statistics", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " of the dataframe\nprint(df.describe())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpjxdo91ce/g1r3WGZRinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "fbc1b233-207f-4f7b-8298-8d72a86d6f2c", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\ndf = pd.read_csv", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "(\"/var/folders/cz/vyh7y1d11x", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "g881lsxsshnc5c0000gn/T", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "/tmp8d5c8spc/zOZSE5zcin", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation.csv\")\nprint(df.head())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmp8d5c8spc/zOZSE5zcinflation.csv\")\nprint(df.head())" + "metric": "completion_tokens", + "span_id": "vTzYAYfO", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:56.985707+00:00", + "__module__": "datetime" }, - "call_id": "c19a0d1e-6b44-408f-9839-819436425778", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "trace_id": "H8ytqaQLQXe6sEEJ", + "type": "metric", + "unit": "tokens", + "value": 45 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\ndf = pd.read_csv(\"/", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "var/folders/cz/vyh7", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "y1d11xg881lsxsshnc5c0000", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "gn/T/tmpn9tlgts1", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "/qYsQ3ZJLin", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation.csv\")\nprint(df.head())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpn9tlgts1/qYsQ3ZJLinflation.csv\")\nprint(df.head())" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "e6c48b40-6504-4043-b3fa-644bd7fafd0f", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the file or the code you used to create the file. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n```\\n\\nThis will give you an idea of what the csv file contains.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "This", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " code will create a line plot of the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " average yearly inflation over time. The x-axis", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " represents the year and the y-axis represents", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the average inflation. The plot will also", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " include a title, labels", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " for the x and y axes, and a grid to make it", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " easier to read.\n\nPlease note that you need to replace '", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "inflation.csv' with the actual path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to your csv file. Also, this code assumes that the csv", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file has a column named 'date' and", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " another column named 'inflation'. If your csv file has", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " different column names, you need to adjust the code accordingly.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the file or the code you used to create the file. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n```\\n\\nThis will give you an idea of what the csv file contains.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " data\ndf = pd.read_csv('inflation.csv')\n\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Convert 'date' column to datetime\ndf['", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "date'] = pd.to_datetime(df", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "['date'])\n\n# Group by year and", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " calculate average inflation\naverage_inflation =", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " df.groupby(df['date'].dt.year)['inflation'].mean", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "()\n\n# Plot the time series\nplt.figure(figsize=(10,", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "6))\nplt.plot(average_inflation.index, average_inflation", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ".values, marker='o')\nplt.title('Average Yearly In", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation')\nplt.xlabel('Year')\nplt.ylabel('Average In", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation')\nplt.grid(True)\nplt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert 'date' column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()" + "metric": "total_tokens", + "span_id": "vTzYAYfO", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:56.985718+00:00", + "__module__": "datetime" }, - "call_id": "6b6c11d8-75d5-4b34-b97b-ee523c7a8168", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "trace_id": "H8ytqaQLQXe6sEEJ", + "type": "metric", + "unit": "tokens", + "value": 136 + } + ] + } } ], "type": "generator" }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are running this code in a notebook, you can use the `upload` button to upload the file. If you are running this code in a script, you need to provide the file path.\\n\\nHere is an example of how you can describe the csv file if you have it in the same directory as your script:\\n\\n```python\\nimport pandas as pd\\n\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n\\n# Print summary of the data\\nprint(df.head()) # Print the first few rows of the data\\nprint(df.info()) # Print information about the data\\nprint(df.describe()) # Print summary statistics about the data\\n```\\n\\nThis will print the first few rows of the data, information about the data, and summary statistics about the data.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { "chunks": [ { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "This", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " code will create a time series plot of the average yearly inflation.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " The x-axis represents the year", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and the y-axis represents the average inflation", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". The plot will show the trend of average yearly inflation over the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " years.\n\nPlease note that you need to replace 'inflation.csv", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "' with the actual path to your csv file. Also, this", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " code assumes that the csv file has a column named 'date'", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and another column named 'inflation'. If", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " your csv file has different column names, you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " need to adjust the code accordingly.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are running this code in a notebook, you can use the `upload` button to upload the file. If you are running this code in a script, you need to provide the file path.\\n\\nHere is an example of how you can describe the csv file if you have it in the same directory as your script:\\n\\n```python\\nimport pandas as pd\\n\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n\\n# Print summary of the data\\nprint(df.head()) # Print the first few rows of the data\\nprint(df.info()) # Print information about the data\\nprint(df.describe()) # Print summary statistics about the data\\n```\\n\\nThis will print the first few rows of the data, information about the data, and summary statistics about the data.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": "", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " data\ndf = pd.read_csv('inflation.csv')\n\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Convert date column to datetime\ndf['date'] = pd.to", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "_datetime(df['date'])\n\n# Group by year and calculate average", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " inflation\naverage_inflation = df.groupby(df['date'].", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "dt.year)['inflation'].mean()\n\n# Plot", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " time series\nplt.figure(figsize=(10,6))\nplt", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ".plot(average_inflation.index, average_inflation.values, marker='", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "o')\nplt.title('Average Yearly Inflation')\nplt.xlabel", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "('Year')\nplt.ylabel('", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "Average Inflation')\nplt.grid(True)\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "plt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert date column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" }, - "call_id": "81d7a873-376b-438e-916d-d5454e6ed09e", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "tool_call": "", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a local file, you can use the `load_data` function from the `code_interpreter` library to load the file. \\n\\nHere is an example of how you can do it:\\n\\n```\\nimport pandas as pd\\nfrom code_interpreter import load_data\\n\\n# Load data\\ndf = load_data(\\'inflation.csv\\')\\n\\n# Print summary of the data\\nprint(df.head())\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will load the csv file and print the first few rows, a summary of the data, and some descriptive statistics. \\n\\nPlease replace \\'inflation.csv\\' with the actual path to your csv file. \\n\\nIf you are using a remote file, you need to provide the actual file path or the file itself. \\n\\nPlease provide the actual file path or the file itself, and I will be happy to help you describe it.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\"inflation.csv\")\\n\\n# Convert date column to datetime\\ndf[\\'date\\'] = pd.to_datetime(df[\\'date\\'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df[\\'date\\'].dt.year)[\\'inflation\\'].mean()\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker=\\'o\\')\\nplt.title(\\'Average Yearly Inflation\\')\\nplt.xlabel(\\'Year\\')\\nplt.ylabel(\\'Average Inflation\\')\\nplt.grid(True)\\nplt.show()'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "It", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " seems that the file \"inflation.csv\" does not exist.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \n\nTo plot the average yearly inflation as a time series, you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " need to provide the actual file path or the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file itself. If you are using a local file, you can", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use the `load_data` function from the `code_interpreter", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "` library to load the file. \n\nHere is an example of how", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " you can do it:\n\n```\nimport pandas as pd\nfrom code_inter", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "preter import load_data\n\n# Load data\ndf", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " = load_data('inflation.csv')\n\n#", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Convert date column to datetime\ndf['date'] = pd.to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_datetime(df['date'])\n\n# Group by year", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and calculate average inflation\naverage_inflation = df.groupby(df['date", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'].dt.year)['inflation'].mean()\n\n# Plot time series\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "plt.figure(figsize=(10,6))\nplt.plot(average_inflation", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".index, average_inflation.values, marker", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "='o')\nplt.title('Average Yearly Inflation')\nplt", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".xlabel('Year')\nplt.ylabel('Average Inflation')\nplt", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".grid(True)\nplt.show()\n```\n\nThis", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " will load the csv file, convert the date column to datetime", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", group by year and calculate the average inflation, and then plot the time", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " series.\n\nPlease replace 'inflation.csv' with the actual path to your", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " csv file. \n\nIf you are using a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " remote file, you need to provide the actual file path or the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file itself. \n\nPlease provide the actual file path or the file itself,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and I will be happy to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " help you plot the average yearly inflation as a time series.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a local file, you can use the `load_data` function from the `code_interpreter` library to load the file. \\n\\nHere is an example of how you can do it:\\n\\n```\\nimport pandas as pd\\nfrom code_interpreter import load_data\\n\\n# Load data\\ndf = load_data(\\'inflation.csv\\')\\n\\n# Print summary of the data\\nprint(df.head())\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will load the csv file and print the first few rows, a summary of the data, and some descriptive statistics. \\n\\nPlease replace \\'inflation.csv\\' with the actual path to your csv file. \\n\\nIf you are using a remote file, you need to provide the actual file path or the file itself. \\n\\nPlease provide the actual file path or the file itself, and I will be happy to help you describe it.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": "", - "type": "tool_call" + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " plt\n\n# Load data\ndf = pd.read_csv(\"", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "inflation.csv\")\n\n# Convert date column to", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " datetime\ndf['date'] = pd.to_datetime(df['", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "date'])\n\n# Group by year and calculate average inflation\naverage_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation = df.groupby(df['date'].dt.year)['inflation", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "'].mean()\n\n# Plot time series\nplt.figure", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "(figsize=(10,6))\nplt.plot", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "(average_inflation.index, average_inflation.values, marker='", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "o')\nplt.title('Average Yearly Inflation')\nplt.xlabel", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ")\nplt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"inflation.csv\")\n\n# Convert date column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "da4cf054-6301-4408-85a8-35f15d1ff698", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a remote server or a local machine, you can use the `pd.read_csv()` function to load the csv file. \\n\\nHere is an example:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column count), and the description of the dataframe (including count, mean, std, min, 25%, 50%, 75%, max for each column).', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "This", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " code will create a line plot of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the average yearly inflation over time. The x-axis", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " represents the year and the y-axis represents the average", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " inflation. The plot also includes a title, labels for the x", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and y axes, and a grid for", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " better visibility.\n\nPlease note that you need", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to replace 'inflation.csv' with the actual path to your", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " csv file. Also, this code assumes that the 'date", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "' column in your csv file is in a format that can be", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " parsed by pandas' `to_datetime` function. If your date", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " column is in a different format, you may need to specify the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " format when calling `to_datetime`.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\"), CompletionMessage(role='assistant', content='It seems that the file \"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a remote server or a local machine, you can use the `pd.read_csv()` function to load the csv file. \\n\\nHere is an example:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv(\\'inflation.csv\\')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column count), and the description of the dataframe (including count, mean, std, min, 25%, 50%, 75%, max for each column).', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": "", - "type": "tool_call" + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\ndf = pd.read_csv('inflation.csv')\n\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Convert 'date' column to datetime\ndf['date']", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " = pd.to_datetime(df['date'])\n\n# Group by", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " year and calculate average inflation\naverage_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation = df.groupby(df['date'].dt.year", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ")['inflation'].mean()\n\n# Plot the time series", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\nplt.figure(figsize=(10,6))\nplt.plot(average_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation.index, average_inflation.values, marker", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "='o')\nplt.title('Average Yearly Inflation')\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "plt.xlabel('Year')\nplt.ylabel('Average", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Inflation')\nplt.grid(True)\nplt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert 'date' column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "65691869-f741-420c-bb73-23a1f8c0d82a", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "It", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " seems that the file \"/var/f", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "olders/cz/vyh7y1d11", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "xg881lsxsshnc5c0000gn/T/tmp8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "d5c8spc/Q8Y9qzV", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Xinflation.csv\" does not exist", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". \n\nTo describe the csv file, you need to provide", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the actual file path or the file itself", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". If you are using a remote server or a local machine,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " you can use the `pd.read_csv()` function to load the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " csv file. \n\nHere is an example:\n\n```python\nimport", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " pandas as pd\n# Load data\ndf", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " = pd.read_csv('inflation.csv", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "')\n# Print the first 5 rows of the dataframe\nprint", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(df.head())\n# Print the summary of the dataframe\nprint(df", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".info())\nprint(df.describe())\n```\n\nThis will print the first", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " 5 rows of the dataframe,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the summary of the dataframe (including the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " index dtype and column count), and the description of the dataframe", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " (including count, mean, std,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " min, 25%, 50%, 75%, max", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " for each column).", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": "", - "type": "tool_call" + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\n# Load data", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\ndf =", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " pd.read_csv(\"/var/folders/cz/vyh7", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "y1d11xg881lsx", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "sshnc5c0000gn/T", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "/tmp8d5c8spc", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "/Q8Y9qzVXinflation.csv\")\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "# Rows\nprint(\"Number of rows and columns in the data", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ":\", df.shape)\n# Columns\nprint(\"Columns of the data", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " are:\", len(df.columns))\n# Column names\nprint", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "(\"Columns of the data are:\", df.columns)\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "# Column dtypes\nprint(\"Datatype of the columns are", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ":\", df.dtypes)", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmp8d5c8spc/Q8Y9qzVXinflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "15893b4c-5a55-4ea7-9902-8a2f28fa3659", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "tool_call": "\", \"celcius\": \"true\"}}", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:255c3\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:3b16c\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'How to use LoRA in Torchtune'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:14b97\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:14b97\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:14b97\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune\\'s LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we\\'ve loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\"\"\"\\n {total_params} total params,\\n {trainable_params}\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \"\"\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe , tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA in Torchtune, you can follow these", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " steps:\n\n1. Install Torchtune and its dependencies", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".\n2. Download the Llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2 weights and tokenizer.\n3. Use the `l", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ora_llama2_7b` model in Torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", which applies LoRA to the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Q and V projections by default.\n4.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Set the `lora_attn_modules` argument to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " apply LoRA to all linear", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " layers in the self-attention.\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "5. Increase the rank and", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " alpha values to experiment with different LoRA", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " configurations.\n6. Run the LoRA finetuning", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " recipe in Torchtune using the `lora_finet", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "une_distributed` command.\n7.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Monitor the loss curves and adjust the Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA configuration as needed to trade off memory and model performance.\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "By following these steps, you can effectively use LoRA in", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Torchtune to fine-tune Llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2 models with a low memory footprint.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:255c3\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:3b16c\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\": \"knowledge_search\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "parameters\": {\"query\": \"How to use LoRA in", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Torchtune\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "query": "How to use LoRA in Torchtune" + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" }, - "call_id": "41f1d05b-cfca-4d54-a0de-38a968017c8b", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:255c3\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:3b16c\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'m ready to help you answer questions about Torchtune based", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " on the documentation you provided. What's your first question?", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:292ee\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:2513e\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'How to use LoRA in Torchtune'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:47152\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:47152\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:47152\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune\\'s LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we\\'ve loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\"\"\"\\n {total_params} total params,\\n {trainable_params}\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \"\"\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe , tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA in Torchtune, you can follow these steps", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ":\n\n1. Install Torchtune and its dependencies.\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2. Download the Llama2 weights and tokenizer.\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "3. Use the `lora_llama2_", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "7b` model in Torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", which applies LoRA to the Q", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and V projections by default.\n4", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Load the base model weights into the LoRA model without any", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " conversion necessary.\n5. Set only LoRA parameters to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " trainable.\n6. Run the LoRA finetuning recipe", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " in Torchtune with the desired configuration.\n\nYou", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " can also experiment with different LoRA configurations, such as", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " applying LoRA to all linear layers in the self", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "-attention, increasing the rank, or scaling alpha and rank", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " together.\n\nBy following these steps, you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " can use LoRA in Torchtune to fine-tune a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama2 model with a low memory footprint and achieve good", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " performance.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:292ee\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:2513e\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"knowledge_search\", \"parameters\": {\"query\": \"How to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA in Torchtune\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "How to use LoRA in Torchtune" + "tool_call": { + "arguments": { + "celcius": "true", + "liquid_name": "polyjuice" + }, + "call_id": "6dd93d40-18ea-40c1-9e4d-78b3bd865e67", + "tool_name": "get_boiling_point" }, - "call_id": "5beb7c24-953b-4ad7-b834-a26522fb5ac7", - "tool_name": "knowledge_search" + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:292ee\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:2513e\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'m ready to help you answer questions about Torchtune based", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " on the documentation you provided. What's your first question", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "?", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:ab1b9\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8bcf6\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'How to use LoRA in Torchtune'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:cc646\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:cc646\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:cc646\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune\\'s LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we\\'ve loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\"\"\"\\n {total_params} total params,\\n {trainable_params}\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \"\"\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe , tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA in Torchtune, you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " can follow these steps:\n\n1. Install Torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and its dependencies.\n2. Download the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama2 weights and tokenizer.\n3", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Use the `lora_llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2_7b` model in Torchtune, which", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " applies LoRA to the Q and V", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " projections by default.\n4. Load the base model weights into", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the LoRA model without any conversion necessary.\n5. Set", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " only LoRA parameters to trainable.\n6. Run the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " LoRA finetuning recipe in Torchtune with the desired", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " configuration.\n\nYou can also experiment with different LoRA configurations, such", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " as applying LoRA to all linear layers in the self-attention", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", increasing the rank, or scaling alpha and rank together.\n\nBy", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " following these steps, you can use LoRA in Torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to fine-tune a Llama2", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " model with parameter-efficient finetuning and memory savings.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:ab1b9\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8bcf6\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"knowledge_search\", \"parameters", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\": {\"query\": \"How to use LoRA in Tor", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "chtune\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "query": "How to use LoRA in Torchtune" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "5af3ef1f-98c0-4c60-9b8b-892b5e921040", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:ab1b9\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8bcf6\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'m ready to help you answer questions about Torchtune based on", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the documentation you provided. What's your first question?", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:c4b2d\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:e37c3\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='You can use the following function call to answer the user\\'s question:\\n\\n{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters\": {\"query\": \"How to fine-tune a Llama2 model with LoRA in torchtune\"}}', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'How to use LoRA'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:606ad\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:e37c3\\nContent: with training with LoRA quickly,\\njust specify any config with ``_lora`` in its name, e.g:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device\\n\\n\\nThere are two sets of parameters to customize LoRA to suit your needs. Firstly, the parameters which control\\nwhich linear layers LoRA should be applied to in the model:\\n\\n* ``lora_attn_modules: List[str]`` accepts a list of strings specifying which layers of the model to apply\\n LoRA to:\\n\\n * ``q_proj`` applies LoRA to the query projection layer.\\n * ``k_proj`` applies LoRA to the key projection layer.\\n * ``v_proj`` applies LoRA to the value projection layer.\\n * ``output_proj`` applies LoRA to the attention output projection layer.\\n\\n Whilst adding more layers to be fine-tuned may improve model accuracy,\\n this will come at the cost of increased memory usage and reduced training speed.\\n\\n* ``apply_lora_to_mlp: Bool`` applies LoRA to the MLP in each transformer layer.\\n* ``apply_lora_to_output: Bool`` applies LoRA to the model\\'s final output projection.\\n This is usually a projection to vocabulary space (e.g. in language models), but\\n other modelling tasks may have different projections - classifier models will project\\n to the number of classes, for example\\n\\n.. note::\\n\\n Models which use tied embeddings (such as Gemma and Qwen2 1.5B and 0.5B) for the\\n final output projection do not support ``apply_lora_to_output``.\\n\\nThese are all specified under the ``model`` flag or config entry, i.e:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"output_proj\"]\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.llama3.lora_llama3_8b\\n apply_lora_to_mlp: True\\n model.lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\",\"output_proj\"]\\n\\nSecondly, parameters which control the scale of the impact of LoRA on the model:\\n\\n* ``lora_rank: int`` affects the scale of\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:606ad\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2_7b ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:0b7ba\\nContent: ora_finetune_label>`.\\nFor more on QLoRA in torchtune, see our :ref:`QLoRA Tutorial `.\\n\\nLet\\'s take a look at how we can fine-tune Llama3-8B-Instruct with LoRA on a single device using torchtune. In this example, we will fine-tune\\nfor one epoch on a common instruct dataset for illustrative purposes. The basic command for a single-device LoRA fine-tune is\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device\\n\\n.. note::\\n To see a full list of recipes and their corresponding configs, simply run ``tune ls`` from the command line.\\n\\nWe can also add :ref:`command-line overrides ` as needed, e.g.\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n checkpointer.checkpoint_dir= \\\\\\n tokenizer.path=/tokenizer.model \\\\\\n checkpointer.output_dir=\\n\\nThis will load the Llama3-8B-Instruct checkpoint and tokenizer from ```` used in the :ref:`tune download ` command above,\\nthen save a final checkpoint in the same directory following the original format. For more details on the\\ncheckpoint formats supported in torchtune, see our :ref:`checkpointing deep-dive `.\\n\\n.. note::\\n To see the full set of configurable parameters for this (and other) configs we can use :ref:`tune cp ` to copy (and modify)\\n the default config. :ref:`tune cp ` can be used with recipe scripts too, in case you want to make more custom changes\\n that cannot be achieved by directly modifying existing configurable parameters. For more on :ref:`tune cp ` see the section on\\n :ref:`modifying configs ` in our \":ref:`finetune_llama_label`\" tutorial.\\n\\nOnce training is complete, the model checkpoints will be saved and their locations will be logged. For\\nLoRA fine-tuning, the final checkpoint will contain the merged weights, and a copy of just the (much smaller) LoRA weights\\nwill\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA, you can follow these steps", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ":\n\n1. Install the necessary packages, including torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and the Llama2 model.\n2.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Load the Llama2 model and specify which layers to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " apply LoRA to.\n3. Define the LoRA parameters", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", such as the rank and alpha values.\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "4. Train the model using the LoRA fine-tuning", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " recipe in torchtune.\n5. Use the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " trained model for inference or further fine-tuning.\n\nHere is an", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " example of how to use LoRA with the Llama2 model", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ":\n\n```python\nfrom torchtune.models.llama2 import", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " llama2_7b, lora_llama2_7", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "b\n\n# Build Llama2 without any LoRA layers\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "base_model = llama2_7b()\n\n# The default settings for", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " lora_llama2_7b will match", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " those for llama2_7b\n# We just need to define", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " which layers we want LoRA applied to.\n# Within each self-", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "attention, we can choose from [\"q_proj\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "k_proj\", \"v_proj\", and \"output_proj\"].\n# We", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " can also set apply_lora_to_mlp=True or apply_lora", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_to_output=True to apply LoRA to other linear\n# layers outside", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " of the self-attention.\nlora_model = lora_llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2_7b(lora_attn_modules=[\"q_proj\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "v_proj\"])\n\n# Print the first layer's self-attention in the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " usual Llama2 model\nprint(base_model.layers[0].at", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "tn)\n# Print the same for Llama2 with LoRA weights", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\nprint(lora_model.layers[0].attn)\n```\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "This code loads the Llama2 model and applies LoRA to the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " query and value projection layers. You can modify the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " `", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lora_attn_modules` parameter to apply LoRA to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " different layers.\n\nTo train the model using the LoRA fine", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "-tuning recipe in torchtune, you can use the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " following command:\n\n```bash\ntune run l", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ora_finetune_single_device --config llama3/", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "8B_lora_single_device\n```\n\nThis will train the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " model for one epoch on a common instruct", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " dataset. You can modify the command to change the training settings", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", such as the number of epochs or the batch size.\n\nAfter", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " training, you can use the trained model for inference or further fine", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "-tuning. You can load the model using the `", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "load_checkpoint` method and use it to make predictions or continue", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " training.\n\n```", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "out_of_tokens" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:c4b2d\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:e37c3\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='You can use the following function call to answer the user\\'s question:\\n\\n{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters\": {\"query\": \"How to fine-tune a Llama2 model with LoRA in torchtune\"}}', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\": {\"query\": \"How to use LoRA\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "How to use LoRA" + "metric": "prompt_tokens", + "span_id": "tBuntiC1", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:54.993737+00:00", + "__module__": "datetime" }, - "call_id": "8b617e66-08b4-4e93-8219-29b8b84c4672", - "tool_name": "knowledge_search" + "trace_id": "5SueXj79Q2e5n37g", + "type": "metric", + "unit": "tokens", + "value": 43 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:c4b2d\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:e37c3\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "You", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " can use the following function call to answer the user's question:\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"type\": \"function\", \"name\": \"knowledge_search\",", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"parameters\": {\"query\": \"How to fine-tune a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama2 model with LoRA in torchtune\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f3963\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:e075f\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'How to use LoRA in Torchtune'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:0484f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:0484f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:0484f\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune\\'s LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we\\'ve loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\"\"\"\\n {total_params} total params,\\n {trainable_params}\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \"\"\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe , tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA in Torchtune, you can follow these steps", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ":\n\n1. Install Torchtune and its dependencies.\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2. Download the Llama2 weights and tokenizer.\n3", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Use", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the `lora_llama2_7", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "b` model in Torchtune, which applies", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " LoRA to the Q and V projections by default", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".\n4. Load the base model weights into the LoRA", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " model without any conversion necessary.\n5.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Set only LoRA parameters", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to trainable.\n6.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Run the LoRA fin", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "etuning recipe in Torcht", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "une with the desired configuration.\n\nYou can also experiment", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " with different Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA configurations, such as applying LoRA to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " all linear layers in the self-attention, increasing the rank,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " or scaling alpha and rank together.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f3963\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:e075f\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\": \"knowledge_search\", \"parameters\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " {\"query\": \"How to use Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA in Torchtune\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "How to use LoRA in Torchtune" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "42e1de09-f47e-44b0-9331-9b878556970d", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f3963\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:e075f\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'m ready to help you answer questions about", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Torchtune based on the documentation you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " provided. What's your first question?", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f4fd3\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8892b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='You can use the following function call to answer the user\\'s question:\\n\\n{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters\": {\"query\": \"How to fine-tune a Llama2 model with LoRA in torchtune\"}}', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'How to use LoRA'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:cbc88\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:8892b\\nContent: with training with LoRA quickly,\\njust specify any config with ``_lora`` in its name, e.g:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device\\n\\n\\nThere are two sets of parameters to customize LoRA to suit your needs. Firstly, the parameters which control\\nwhich linear layers LoRA should be applied to in the model:\\n\\n* ``lora_attn_modules: List[str]`` accepts a list of strings specifying which layers of the model to apply\\n LoRA to:\\n\\n * ``q_proj`` applies LoRA to the query projection layer.\\n * ``k_proj`` applies LoRA to the key projection layer.\\n * ``v_proj`` applies LoRA to the value projection layer.\\n * ``output_proj`` applies LoRA to the attention output projection layer.\\n\\n Whilst adding more layers to be fine-tuned may improve model accuracy,\\n this will come at the cost of increased memory usage and reduced training speed.\\n\\n* ``apply_lora_to_mlp: Bool`` applies LoRA to the MLP in each transformer layer.\\n* ``apply_lora_to_output: Bool`` applies LoRA to the model\\'s final output projection.\\n This is usually a projection to vocabulary space (e.g. in language models), but\\n other modelling tasks may have different projections - classifier models will project\\n to the number of classes, for example\\n\\n.. note::\\n\\n Models which use tied embeddings (such as Gemma and Qwen2 1.5B and 0.5B) for the\\n final output projection do not support ``apply_lora_to_output``.\\n\\nThese are all specified under the ``model`` flag or config entry, i.e:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"output_proj\"]\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.llama3.lora_llama3_8b\\n apply_lora_to_mlp: True\\n model.lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\",\"output_proj\"]\\n\\nSecondly, parameters which control the scale of the impact of LoRA on the model:\\n\\n* ``lora_rank: int`` affects the scale of\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:cbc88\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2_7b ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:9dcb7\\nContent: ora_finetune_label>`.\\nFor more on QLoRA in torchtune, see our :ref:`QLoRA Tutorial `.\\n\\nLet\\'s take a look at how we can fine-tune Llama3-8B-Instruct with LoRA on a single device using torchtune. In this example, we will fine-tune\\nfor one epoch on a common instruct dataset for illustrative purposes. The basic command for a single-device LoRA fine-tune is\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device\\n\\n.. note::\\n To see a full list of recipes and their corresponding configs, simply run ``tune ls`` from the command line.\\n\\nWe can also add :ref:`command-line overrides ` as needed, e.g.\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n checkpointer.checkpoint_dir= \\\\\\n tokenizer.path=/tokenizer.model \\\\\\n checkpointer.output_dir=\\n\\nThis will load the Llama3-8B-Instruct checkpoint and tokenizer from ```` used in the :ref:`tune download ` command above,\\nthen save a final checkpoint in the same directory following the original format. For more details on the\\ncheckpoint formats supported in torchtune, see our :ref:`checkpointing deep-dive `.\\n\\n.. note::\\n To see the full set of configurable parameters for this (and other) configs we can use :ref:`tune cp ` to copy (and modify)\\n the default config. :ref:`tune cp ` can be used with recipe scripts too, in case you want to make more custom changes\\n that cannot be achieved by directly modifying existing configurable parameters. For more on :ref:`tune cp ` see the section on\\n :ref:`modifying configs ` in our \":ref:`finetune_llama_label`\" tutorial.\\n\\nOnce training is complete, the model checkpoints will be saved and their locations will be logged. For\\nLoRA fine-tuning, the final checkpoint will contain the merged weights, and a copy of just the (much smaller) LoRA weights\\nwill\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA, you can follow these steps:\n\n1.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Install the necessary packages, including torchtune and the L", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lama2 model.\n2. Load the Llama2 model", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and specify which layers to apply LoRA to.\n3. ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Define the LoRA parameters, such as the rank and alpha values", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".\n4. Train the model using the LoRA fine-t", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "uning recipe in torchtune.\n\nHere is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " an example of how to use", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " LoRA with the Llama2 model", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ":\n\n```python\nfrom torchtune.models.llama2 import", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " llama2_7b, lora_llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2_7b\n\n# Build Llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2 without any LoRA layers\nbase_model = llama2_", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "7b()\n\n# The default settings for lora_llama2", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_7b will match those for llama2_7", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "b\n# We just need to define which layers we want", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " LoRA applied to.\n# Within each self", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "-attention, we can choose from [\"q_proj\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "k_proj\", \"v_proj\", and \"output_proj", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\"].\n# We can also set apply_lora_to_m", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lp=True or apply_lora_to_output=True", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to apply LoRA to other", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " linear\n# layers outside of the self-attention.\nl", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ora_model = lora_llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2_7b(lora_at", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "tn_modules=[\"q_proj\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "v_proj\"])\n\n# Print the first", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " layer's self-attention in the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " usual Llama2 model\nprint", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(base_model.layers[0].attn)\n# Print the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " same for Llama2 with LoRA", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " weights\nprint(lora_model.layers[0].attn", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ")\n```\n\nThis code will load the Llama2 model", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and apply LoRA to the specified layers. The `l", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ora_attn_modules` parameter is used to specify which layers", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to apply LoRA to, and the `apply_lora", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_to_mlp` and `apply_lora_to_output`", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " parameters can be used to apply LoRA", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to other linear layers outside of the self", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "-attention.\n\nYou can also use the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " `tune run` command to fine-tune the model", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " using the LoRA fine-tuning recipe in torchtune.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " For example:\n\n```bash\ntune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " run lora_finetune_single_device --config llama3", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "/8B_lora_single_device\n``", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "`\n\nThis will run the LoRA fine-tuning recipe on the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama3-8B-Instruct model using the default configuration", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". You can modify the configuration by adding command-line overrides, such", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " as:\n\n```bash\ntune run", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "out_of_tokens" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f4fd3\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8892b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='You can use the following function call to answer the user\\'s question:\\n\\n{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters\": {\"query\": \"How to fine-tune a Llama2 model with LoRA in torchtune\"}}', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "name\": \"knowledge_search\", \"parameters\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " {\"query\": \"How to use LoRA\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "How to use LoRA" + "metric": "completion_tokens", + "span_id": "tBuntiC1", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:54.993758+00:00", + "__module__": "datetime" }, - "call_id": "64448cc3-c11a-4bae-bdcc-e5b8d13b888f", - "tool_name": "knowledge_search" + "trace_id": "5SueXj79Q2e5n37g", + "type": "metric", + "unit": "tokens", + "value": 10 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f4fd3\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:8892b\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "You", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " can use the following function call to answer", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the user's question:\n\n{\"type\": \"function\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "name\": \"knowledge_search\", \"parameters\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " {\"query\": \"How to fine-tune a Llama2", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " model with LoRA in torchtune\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\", \"parameters\": {\"query\": \"Torchtune", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " documentation\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Torchtune documentation" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "0f0eb27a-1126-4d26-8b33-b630a9518093", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \"role\": \"system\",\\n \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='insert_into_memory', description='Insert documents into memory', parameters={}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " attention type used by Llama3-8B is grouped", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "-query attention.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \"role\": \"system\",\\n \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " attention type used by Llama3-", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "8B is grouped-query attention.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='insert_into_memory', description='Insert documents into memory', parameters={}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"type\": \"function\",\n \"name\": \"knowledge", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_search\",\n \"parameters\": {\n \"query\": \"L", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lama3-8B attention type\"\n }\n}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Llama3-8B attention type" + "metric": "total_tokens", + "span_id": "tBuntiC1", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:54.993761+00:00", + "__module__": "datetime" }, - "call_id": "ce62cb6d-fcb0-437a-abd9-b0bed88628ed", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "trace_id": "5SueXj79Q2e5n37g", + "type": "metric", + "unit": "tokens", + "value": 53 + } + ] + } } ], "type": "generator" }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { "chunks": [ { - "event": { - "delta": { - "text": "", - "type": "text" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" }, - "tool_call": "", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling point of polyjuice is -100\u00b0C.", + "type": "text" }, - "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\",", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": " \"parameters\": {\"query\": \"L", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" }, - "tool_call": "lama3-8B attention type\"}}", - "type": "tool_call" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Llama3-8B attention type" + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "25fcc4f2-72a8-4175-82ca-c7a692d13d66", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'query': 'current CEO of Meta'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content='{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company\\'s position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Forbes\", \"url\": \"https://www.forbes.com/profile/mark-zuckerberg/\", \"content\": \"Meta CEO Mark Zuckerberg \\\\u201cloved\\\\u201d an image on Facebook known as \\\\\"Challah Horse\\\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\", \"score\": 0.6410185, \"raw_content\": null}]}')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " current CEO of Meta is Mark Zuckerberg.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'query': 'current CEO of Meta'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content='{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company\\'s position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}, {\"title\": \"Meta - Leadership & Governance\", \"url\": \"https://investor.atmeta.com/leadership-and-governance/\", \"content\": \"Mr. Andreessen was a co-founder of Netscape Communications Corporation, a software company, serving in various positions, including Chief Technology Officer and Executive Vice President of Products. Ms. Killefer also served as Assistant Secretary for Management, Chief Financial Officer, and Chief Operating Officer of the U.S. Department of the Treasury from 1997 to 2000 and as a member of the IRS Oversight Board from 2000 to 2005, including as Chair of the IRS Oversight Board from 2002 to 2004. Ms. Travis has served as Executive Vice President and Chief Financial Officer of The Estee Lauder Companies Inc., a global manufacturer and marketer of skin care, makeup, fragrance and hair care products, since August 2012.\", \"score\": 0.6175132, \"raw_content\": null}]}')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " current CEO of Meta is Mark Zuckerberg", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'query': 'current CEO of Meta'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content='{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Zuckerberg\\'s political pivot targets Apple, puts Meta staffers on edge\", \"url\": \"https://www.cnbc.com/2025/02/14/zuckerbergs-rightward-policy-shift-hits-meta-staffers-targets-apple.html\", \"content\": \"Meta CEO Mark Zuckerberg\\'s actions to curry favor with the president have rattled employees, but people familiar with his efforts say there\\'s a clear strategy.\", \"score\": 0.77179235, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company\\'s position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}]}')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " current CEO of Meta is Mark Zuckerberg.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "brave_search.call(query=\"current", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " CEO of Meta\")", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "current CEO of Meta" + "metric": "prompt_tokens", + "span_id": "03QQgo3b", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:34.636678+00:00", + "__module__": "datetime" }, - "call_id": "f5d644f1-3ada-4a5a-a088-736c89428fe9", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "brave_search" - } + "trace_id": "mE4SuRfcQUOcOyP2", + "type": "metric", + "unit": "tokens", + "value": 85 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " function `get_boiling_point`", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " is not able to find the boiling point of polyjuice as", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " it is a fictional liquid from the Harry Potter series.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " function `get_boiling_point` is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not able to find the boiling point of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " polyjuice as it is a fictional", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " liquid from the Harry Potter series. The function is only able", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to find the boiling point of real liquids.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " function `get_boiling_point` is not", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " able to find the boiling point of poly", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "juice as it is not a real liquid. Polyjuice", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " is a magical potion from the Harry Potter", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " series.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " function `get_boiling_point`", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " is not able to find the boiling point of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " polyjuice as it is not a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " real liquid.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " function `get_boiling_point` is not", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " able to find the boiling point of polyjuice as it is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not a real liquid. Polyjuice is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a magical potion from the Harry Potter series.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice'})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " function `get_boiling_point` is not able", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to find the boiling point of polyju", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ice as it is not a real", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " liquid.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"get_bo", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "iling_point\", \"parameters\": {\"liquid", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "_name\": \"polyjuice\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "liquid_name": "polyjuice" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "490c45b2-2a13-4ee1-9e37-711fabdbcc88", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\", \"name\": \"get_boiling_point\",", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " \"parameters\": {\"liquid_name\": \"", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "polyjuice\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "liquid_name": "polyjuice" + "metric": "completion_tokens", + "span_id": "03QQgo3b", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:34.636767+00:00", + "__module__": "datetime" }, - "call_id": "22050f4b-36df-48fb-ac11-e3a47fa0beaf", - "tool_name": "get_boiling_point" + "trace_id": "mE4SuRfcQUOcOyP2", + "type": "metric", + "unit": "tokens", + "value": 22 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"get", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "_boiling_point\", \"parameters\": {\"liquid_name\": \"polyjuice", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "liquid_name": "polyjuice" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "b5f6f475-f1ed-4916-9959-405e72ca0c1d", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": \"get_boiling_point\", \"parameters", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": {\"liquid_name\": \"polyjuice\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "liquid_name": "polyjuice" + "metric": "total_tokens", + "span_id": "03QQgo3b", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:34.636773+00:00", + "__module__": "datetime" }, - "call_id": "11302682-7a3a-45f3-955b-6709444fd626", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "trace_id": "mE4SuRfcQUOcOyP2", + "type": "metric", + "unit": "tokens", + "value": 107 + } + ] + } } ], "type": "generator" }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point_with_metadata\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}": { "chunks": [ { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " couldn't find any information on the boiling point of Polyjuice", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Polyjuice is a magical potion in", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the Harry Potter series that allows the drinker to transform into", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " someone else. It's not a physical substance with a boiling point", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". If you have any other questions, I'd be happy to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " help.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " couldn't find any information on the boiling point", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " of Polyjuice. Polyjuice is a magical potion in the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Harry Potter series that allows the drinker to transform into someone else. It's", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not a physical substance with a boiling point. If", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " you have any other questions, I'd be", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " happy to help.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='str', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": "", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" }, - "tool_call": "{\"type\": \"function\", \"name\": \"", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling point of polyjuice is -100\u00b0C.", + "type": "text" }, - "tool_call": "get_boiling_point\", \"parameters\": {\"liquid_name\": \"", - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": "polyjuice\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" }, - "tool_call": { - "arguments": { - "liquid_name": "polyjuice" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "3e1a2cdc-46c3-4f2f-9fca-874fdea1700c", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "_point\", \"parameters\": {\"liquid_name", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": \"polyjuice\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "liquid_name": "polyjuice" + "metric": "prompt_tokens", + "span_id": "vzNuoz4e", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:45.792508+00:00", + "__module__": "datetime" }, - "call_id": "e704d0f9-45a1-4ed1-90b0-8a05c504da6c", - "tool_name": "get_boiling_point" + "trace_id": "vNRMmadcTVmfkn5-", + "type": "metric", + "unit": "tokens", + "value": 87 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Write code and execute it to find the answer for: What is the 100th prime number?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef get_nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(get_nth_prime(100))'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " 100th prime number is 541.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Write code and execute it to find the answer for: What is the 100th prime number?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "def is_prime(n):\n if n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " <= 1:\n return False\n if n <= 3:\n return", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " True\n if n % 2 == 0 or n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " % 3 == 0:\n ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " return False\n i = 5\n while i *", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " i <= n:\n if n % i", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " == 0 or n % (i + 2", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ") == 0:\n return False", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\n i += 6\n ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " return True\n\ndef get_nth_prime(n):\n count = ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "0\n num = 2\n ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " while True:\n if is_prime(num):\n count += ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "1\n if count == n:\n return num\n ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " num += 1\n\nprint(get_nth_prime(100))", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "def is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef get_nth_prime(n):\n count = 0\n num = 2\n while True:\n if is_prime(num):\n count += 1\n if count == n:\n return num\n num += 1\n\nprint(get_nth_prime(100))" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "6d57c323-7679-447f-9928-ccab76c0bdc9", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Per", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "plexity the company was founded in 202", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Per", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "plexity the company was founded in 2022.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\": \"knowledge_search\", \"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "parameters\": {\"query\": \"Perplexity company founding date\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Perplexity company founding date" + "metric": "completion_tokens", + "span_id": "vzNuoz4e", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:45.792536+00:00", + "__module__": "datetime" }, - "call_id": "22d5440e-2873-4956-a81f-f114fc78671d", - "tool_name": "knowledge_search" + "trace_id": "vNRMmadcTVmfkn5-", + "type": "metric", + "unit": "tokens", + "value": 22 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Perplexity company founding date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\": \"knowledge_search\",", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"parameters\": {\"query\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"Perplexity company founding date\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Perplexity company founding date" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "e4a5ff1d-ac00-4e0a-b93b-17e19fa3bc55", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": \"knowledge_search\", \"parameters\": {\"", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "query\": \"Perplexity company founding date\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Perplexity company founding date" + "metric": "total_tokens", + "span_id": "vzNuoz4e", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:45.792544+00:00", + "__module__": "datetime" }, - "call_id": "98d3790b-1b84-4ab7-ad66-117fea68d5db", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "trace_id": "vNRMmadcTVmfkn5-", + "type": "metric", + "unit": "tokens", + "value": 109 + } + ] + } } ], "type": "generator" }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { "chunks": [ { - "event": { - "delta": { - "text": "", - "type": "text" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\",", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " \"parameters\": {\"query\": \"Perplexity company founding", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " date\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Perplexity company founding date" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" }, - "call_id": "6add8292-f388-4ec5-8ec5-5071c9397492", - "tool_name": "knowledge_search" + "tool_call": "", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'NBA creation date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " NBA was created on August ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "3, 1949, with", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the merger of the Basketball Association of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " America (BAA) and the National Basketball League", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " (NBL).", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'NBA creation date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " NBA was created on August 3, ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "1949, with the merger of the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Basketball Association of America (BAA) and the National Basketball", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " League (NBL).", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'when was the nba created'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'when was the nba created'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " NBA was created on August 3, 1949,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " with the merger of the Basketball Association of America", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " (BAA) and the National Basketball", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " League (NBL).", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'when was the nba created'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "{\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "type\": \"function\", \"name\":", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " \"knowledge_search\", \"parameters\": {\"query", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\": \"when was the nba created\"}}", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "query": "when was the nba created" + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "c132966d-e4be-47de-9512-7e9e2e6d896c", - "tool_name": "knowledge_search" + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point\",", + "type": "tool_call" }, - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": \"knowledge_search\", \"parameters", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\": {\"query\": \"when was", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " the nba created\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "when was the nba created" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "0145ecf7-ff15-4e06-8684-d9c60e0e2966", - "tool_name": "knowledge_search" + "tool_call": " \"parameters\": {\"liquid_name\": \"polyjuice\", \"celci", + "type": "tool_call" }, - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.1-8B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\", \"parameters\": {\"query\": \"NBA creation date\"}}", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "NBA creation date" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "f50656dd-201d-44b0-8f9f-ca88b970b3fd", - "tool_name": "knowledge_search" + "tool_call": "us\": \"true\"}}", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': True})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " provided function definitions are not suitable for", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " this task", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Please re", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "work them to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " align with the task requirements.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant Always respond with tool calls no matter what. '), UserMessage(role='user', content='Get the boiling point of polyjuice with a tool call.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "get_boiling_point(liquid_name='polyjuice", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "', celcius=True)]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "celcius": true, - "liquid_name": "polyjuice" + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" }, - "call_id": "1fc2d874-894e-4857-ae2b-7aacc75c330e", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': True})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " function call returned an error", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " since \"polyjuice\" is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not a real liquid. Polyjuice is a fictional substance", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " from the Harry Potter series. The boiling", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " point of a liquid is a physical", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " property that can be measured and", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " quantified, but it only applies", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to real substances that exist in the physical world.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Call get_boiling_point and answer What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "get_boiling_point(liquid_name='polyjuice", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "', celcius=True)]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": true, - "liquid_name": "polyjuice" + "tool_call": { + "arguments": { + "celcius": "true", + "liquid_name": "polyjuice" + }, + "call_id": "98d5962a-eab3-4d83-bca4-d4d6aa54f1dc", + "tool_name": "get_boiling_point" }, - "call_id": "7d72d1ae-9f52-40c7-8dc5-48fff52b253a", - "tool_name": "get_boiling_point" + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Give me a sentence that contains the word: hello', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "When", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " I answered the phone, the friendly", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " voice on the other end said \"hello\" and asked how I was doing", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)\\n# Sample of data\\nprint(\"Data sample from file:\")\\nprint(df.head())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file 'bwrap' was not", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " found. This is likely because the file path provided is incorrect or the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file does not exist in the specified location.\n\nTo resolve", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " this issue, you should ensure that", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file path is correct and the file exists in the specified location. If", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file is located in a different directory, you should", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " provide the correct file path.\n\nAdditionally, you can", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use the `os` module to check if the file exists before attempting", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to read it. Here", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'s an example:\n\n```python\nimport os\nimport", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " pandas as pd\n\nfile_path = \"/var/folders", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "/rb/qv8vwgyj6yjd3t4p", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "wsy9t0rm0000gn/T/tmp4n_d_h", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "5o/u4yh2j11inflation.csv\"\n\nif", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " os.path.isfile(file_path):\n df =", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " pd.read_csv(file_path)\n print", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(\"Number of rows and columns in the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " data:\", df.shape)\n print(\"Columns of the data are:\", len", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(df.columns))\n print(\"Columns of the data are:\", df.columns)\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " print(\"Datatype of the columns are:\", df.dtypes)\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " print(\"Data sample from file:\")\n print(df.head())\nelse:\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " print(\"The file does not exist\")\n```\n\nThis code checks if", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file exists before attempting to read it. If the file does not exist", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", it prints a message indicating that the file does not exist.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv file, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": "", - "type": "tool_call" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": "import pandas as pd\n# Load", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" }, - "tool_call": " data\ndf = pd.read_csv(\"/var/folders/rb/qv", - "type": "tool_call" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "8vwgyj6yjd3t4pwsy9t", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "0rm0000gn/T/tmp4n_d_h5o/u4", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "yh2j11inflation.csv\")\n# Rows\nprint(\"Number of", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " rows and columns in the data:\", df.shape)\n# Columns\nprint(\"", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "Columns of the data are:\", len(df.columns))\n# Column names\nprint", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "(\"Columns of the data are:\", df.columns)\n# Column dtypes\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "print(\"Datatype of the columns are:\", df.dtypes)\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Sample of data\nprint(\"Data sample from file:\")\nprint(df.head())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp4n_d_h5o/u4yh2j11inflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)\n# Sample of data\nprint(\"Data sample from file:\")\nprint(df.head())" + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "517038eb-c373-441b-96fe-3a0e2f063fc0", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file being moved to a different location.\\n\\nTo resolve this issue, you should ensure that the file exists and the path is correct. If the file does exist, you can try to load it using the correct path. If the file does not exist, you will need to create it or obtain it from the relevant source.\\n\\nHere is an example of how you can modify the code to handle this situation:\\n\\n```\\nimport pandas as pd\\n\\n# Define the path to the CSV file\\nfile_path = \"\"\\n\\n# Check if the file exists\\nimport os\\nif os.path.isfile(file_path):\\n # Load the CSV file\\n df = pd.read_csv(file_path)\\n\\n # Print the first few rows of the dataframe\\n print(df.head())\\n\\n # Print information about the dataframe\\n print(df.info())\\n\\n # Print summary statistics about the dataframe\\n print(df.describe())\\nelse:\\n print(\"The file does not exist.\")\\n```\\n\\nThis code will check if the file exists before attempting to load it. If the file does not exist, it will print a message indicating that the file does not exist.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Convert the \\'Year\\' column to datetime\\ndf[\\'Year\\'] = pd.to_datetime(df[\\'Year\\'], format=\\'%Y\\')\\n\\n# Group by \\'Year\\' and calculate the average inflation\\ndf_avg_inflation = df.groupby(\\'Year\\')[\\'Inflation\\'].mean().reset_index()\\n\\n# Plot the average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation[\\'Year\\'], df_avg_inflation[\\'Inflation\\'], marker=\\'o\\')\\nplt.title(\\'Average Yearly Inflation\\')\\nplt.xlabel(\\'Year\\')\\nplt.ylabel(\\'Inflation\\')\\nplt.grid(True)\\nplt.show()'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file \"/var/folders/rb/qv8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "vwgyj6yjd3t4pwsy9t0", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "rm0000gn/T/tmpbb210725/duWDtjG", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ninflation.csv\" does not exist. This could be due to a number", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " of reasons such as the file being deleted, the path being incorrect, or", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file being moved to a different location.\n\nTo resolve this issue, you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " should ensure that the file exists and the path is correct. If the file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " does exist, you can try to load it using the correct path. If", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file does not exist, you will need to create it or obtain it", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " from the relevant source.\n\nHere is an example of how you can modify the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " code to handle this situation:\n\n```\nimport pandas as pd\nimport matplotlib", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".pyplot as plt\n\n# Define the path to the CSV file\nfile_path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " = \"/var/folders/rb/qv8vwgyj6y", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "jd3t4pwsy9t0rm0000gn/T", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "/tmpbb210725/duWDtjGninflation.csv\"\n\n#", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Check if the file exists\nimport os\nif os.path.isfile(file_path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "):\n # Load the CSV file\n df = pd.read_csv(file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_path)\n\n # Convert the 'Year' column to datetime\n df", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "['Year'] = pd.to_datetime(df['Year'], format='%Y')\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " # Group by 'Year' and calculate the average inflation\n df", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_avg_inflation = df.groupby('Year')['Inflation'].mean().reset", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_index()\n\n # Plot the average yearly inflation as a time series\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " plt.figure(figsize=(10,6))\n plt.plot(df_avg_inflation", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "['Year'], df_avg_inflation['Inflation'], marker='o')\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " plt.title('Average Yearly Inflation')\n plt.xlabel('Year", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "')\n plt.ylabel('Inflation')\n plt.grid(True)\n plt", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".show()\nelse:\n print(\"The file does not exist.\")\n```\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "This code will check if the file exists before attempting to load it. If", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file does not exist, it will print a message indicating that the file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " does not exist.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file being moved to a different location.\\n\\nTo resolve this issue, you should ensure that the file exists and the path is correct. If the file does exist, you can try to load it using the correct path. If the file does not exist, you will need to create it or obtain it from the relevant source.\\n\\nHere is an example of how you can modify the code to handle this situation:\\n\\n```\\nimport pandas as pd\\n\\n# Define the path to the CSV file\\nfile_path = \"\"\\n\\n# Check if the file exists\\nimport os\\nif os.path.isfile(file_path):\\n # Load the CSV file\\n df = pd.read_csv(file_path)\\n\\n # Print the first few rows of the dataframe\\n print(df.head())\\n\\n # Print information about the dataframe\\n print(df.info())\\n\\n # Print summary statistics about the dataframe\\n print(df.describe())\\nelse:\\n print(\"The file does not exist.\")\\n```\\n\\nThis code will check if the file exists before attempting to load it. If the file does not exist, it will print a message indicating that the file does not exist.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " file\ndf = pd.read_csv(\"/var/folders/rb/qv", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "8vwgyj6yjd3t4pwsy9t", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "0rm0000gn/T/tmpbb210725/duWDtj", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "Gninflation.csv\")\n\n# Convert the 'Year' column to datetime\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "df['Year'] = pd.to_datetime(df['Year'], format", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "='%Y')\n\n# Group by 'Year' and calculate", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " the average inflation\ndf_avg_inflation = df.groupby('", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "Year')['Inflation'].mean().reset_index()\n\n# Plot the average yearly", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " inflation as a time series\nplt.figure(figsize=(10", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ",6))\nplt.plot(df_avg_inflation['Year'], df_avg_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation['Inflation'], marker='o')\nplt.title('Average Yearly", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ".grid(True)\nplt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmpbb210725/duWDtjGninflation.csv\")\n\n# Convert the 'Year' column to datetime\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\n\n# Group by 'Year' and calculate the average inflation\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n# Plot the average yearly inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt.grid(True)\nplt.show()" + "metric": "prompt_tokens", + "span_id": "1A0bWgLL", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:24.102366+00:00", + "__module__": "datetime" }, - "call_id": "a6646608-a943-4849-884e-1852d5ef4a7e", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "trace_id": "4a5HMcM9R3uWB4Cv", + "type": "metric", + "unit": "tokens", + "value": 37 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file being in a different location.\\n\\nTo resolve this issue, you can try the following:\\n\\n1. Check the file path: Make sure the file path is correct and the file exists in the specified location.\\n2. Use a relative path: If the file is in the same directory as your Python script, you can use a relative path instead of an absolute path.\\n3. Check file permissions: Make sure you have the necessary permissions to read the file.\\n4. Use a try-except block: You can use a try-except block to catch the FileNotFoundError and handle it accordingly.\\n\\nHere is an example of how you can modify the code to handle the FileNotFoundError:\\n\\n```\\nimport pandas as pd\\n\\ntry:\\n df = pd.read_csv(\"\")\\n print(df.head())\\n print(df.info())\\n print(df.describe())\\nexcept FileNotFoundError:\\n print(\"The file does not exist\")\\n```\\n\\nThis code will print \"The file does not exist\" if the file is not found, instead of raising an error.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Convert the \\'Year\\' column to datetime\\ndf[\\'Year\\'] = pd.to_datetime(df[\\'Year\\'], format=\\'%Y\\')\\n\\n# Group by \\'Year\\' and calculate the average inflation\\ndf_avg_inflation = df.groupby(\\'Year\\')[\\'Inflation\\'].mean().reset_index()\\n\\n# Plot the average inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation[\\'Year\\'], df_avg_inflation[\\'Inflation\\'], marker=\\'o\\')\\nplt.title(\\'Average Yearly Inflation\\')\\nplt.xlabel(\\'Year\\')\\nplt.ylabel(\\'Inflation\\')\\nplt.grid(True)\\nplt.show()'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file \"/var/folders", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "/rb/qv8vwgyj6y", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "jd3t4pwsy9t0", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "rm0000gn/T/tmpdcpkc9", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_f/FKWQnYoVinflation.csv\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " does not exist. This could be due to a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " number of reasons such as the file being deleted,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the path being incorrect, or the file being in", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a different location.\n\nTo resolve this issue, you can try the following:\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "1. Check the file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " path: Make sure the file path is correct and", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file exists in the specified location.\n2. Use a relative path:", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " If the file is in the same directory as your Python script, you can", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use a relative path instead of an absolute path.\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "3. Check file permissions: Make sure you have the necessary permissions to read", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file.\n4. Use a try-except block: You can use", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a try-except block to catch the FileNotFoundError and handle it accordingly.\n\nHere", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " is an example of how you can modify the code to handle the FileNotFoundError:\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "```\nimport pandas as pd\nimport matplotlib.pyplot as plt\n\ntry:\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " df = pd.read_csv(\"/var/folders/rb/q", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "v8vwgyj6yjd3t4pwsy9", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "t0rm0000gn/T/tmpdcpkc9_f/FKW", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "QnYoVinflation", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".csv\")\n df['Year'] = pd.to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_datetime(df['Year'], format='%Y')\n df_avg_inflation =", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " df.groupby('Year')['Inflation'].mean().reset_index()\n plt", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".figure(figsize=(10,6))\n plt.plot(df_avg_inflation['", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Year'], df_avg_inflation['Inflation'], marker='o')\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " plt.title('Average Yearly Inflation')\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " plt.xlabel('Year')\n plt.ylabel('Inflation')\n plt.grid", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(True)\n plt.show()\nexcept FileNotFoundError:\n print(\"The file does", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not exist\")\n```\n\nThis code will print \"The file does not exist", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\" if the file is not found, instead of raising an error.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file being in a different location.\\n\\nTo resolve this issue, you can try the following:\\n\\n1. Check the file path: Make sure the file path is correct and the file exists in the specified location.\\n2. Use a relative path: If the file is in the same directory as your Python script, you can use a relative path instead of an absolute path.\\n3. Check file permissions: Make sure you have the necessary permissions to read the file.\\n4. Use a try-except block: You can use a try-except block to catch the FileNotFoundError and handle it accordingly.\\n\\nHere is an example of how you can modify the code to handle the FileNotFoundError:\\n\\n```\\nimport pandas as pd\\n\\ntry:\\n df = pd.read_csv(\"\")\\n print(df.head())\\n print(df.info())\\n print(df.describe())\\nexcept FileNotFoundError:\\n print(\"The file does not exist\")\\n```\\n\\nThis code will print \"The file does not exist\" if the file is not found, instead of raising an error.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\n\n# Load the CSV file\ndf = pd", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ".read_csv(\"/var/folders/rb/qv", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "8vwgyj6yjd3t4", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "pwsy9t0rm0000gn", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "/T/tmpdcpkc9_f/FKWQ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "nYoVinflation.csv\")\n\n# Convert the '", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "Year' column to datetime\ndf['Year'] = pd", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ".to_datetime(df['Year'], format", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "='%Y')\n\n# Group by 'Year' and calculate the average inflation\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n# Plot the average inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['Year'], df_avg_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation['Inflation'], marker='o')\nplt.title('Average Yearly", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Inflation')\nplt.xlabel('Year')\nplt.ylabel", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "('Inflation')\nplt.grid(True)\nplt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmpdcpkc9_f/FKWQnYoVinflation.csv\")\n\n# Convert the 'Year' column to datetime\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\n\n# Group by 'Year' and calculate the average inflation\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n# Plot the average inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt.grid(True)\nplt.show()" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "619c3b2c-3e23-485f-85bd-38a5ecf398b2", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file not being accessible.\\n\\nTo resolve this issue, you should ensure that the file exists and the path is correct. If the file does not exist, you will need to create it or obtain it from the relevant source. If the path is incorrect, you will need to update the path to the correct location of the file.\\n\\nAdditionally, you can use the `os` module to check if the file exists before trying to load it:\\n\\n```\\nimport os\\nimport pandas as pd\\n\\nfile_path = \"\"\\n\\nif os.path.isfile(file_path):\\n df = pd.read_csv(file_path)\\n print(df.head())\\n print(df.info())\\n print(df.describe())\\nelse:\\n print(\"The file does not exist\")\\n```\\n\\nThis code will check if the file exists before trying to load it, and will print a message if the file does not exist.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Convert the \\'Year\\' column to datetime\\ndf[\\'Year\\'] = pd.to_datetime(df[\\'Year\\'], format=\\'%Y\\')\\n\\n# Group by \\'Year\\' and calculate the average inflation\\ndf_avg_inflation = df.groupby(\\'Year\\')[\\'Inflation\\'].mean().reset_index()\\n\\n# Plot the average inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation[\\'Year\\'], df_avg_inflation[\\'Inflation\\'], marker=\\'o\\')\\nplt.title(\\'Average Yearly Inflation\\')\\nplt.xlabel(\\'Year\\')\\nplt.ylabel(\\'Inflation\\')\\nplt.grid(True)\\nplt.show()'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=, system_message_behavior=)), ('tool_prompt_format', ), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file \"/var/folders/rb/qv8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "vwgyj6yjd3t4pwsy9t0", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "rm0000gn/T/tmp5zsm1ywy/RKBk", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Al1zinflation.csv\" does not exist. This could be due to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a number of reasons such as the file being deleted, the path being incorrect", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", or the file not being accessible.\n\nTo resolve this issue, you should", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " ensure that the file exists and the path is correct. If the file does", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not exist, you will need to create it or obtain it from the relevant", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " source. If the path is incorrect, you will need to update the path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to the correct location of the file.\n\nAdditionally, you can use the `", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "os` module to check if the file exists before trying to load it:\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "```\nimport os\nimport pandas as pd\nimport", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " matplotlib.pyplot as plt\n\nfile_path = \"/var/folders/rb/q", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "v8vwgyj6yjd3t4pwsy9", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "t0rm0000gn/T/tmp5zsm1ywy/R", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "KBkAl1zinflation.csv\"\n\nif os.path.isfile(file_path):\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " df = pd.read_csv(file_path)\n df['", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Year'] = pd.to_datetime(df['Year'], format='%Y')\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " df_avg_inflation = df.groupby('Year')['Inflation'].mean().", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "reset_index()\n plt.figure(figsize=(10,6))\n plt.plot", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(df_avg_inflation['Year'], df_avg_inflation['Inflation'],", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " marker='o')\n plt.title('Average Yearly Inflation')\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " plt.xlabel('Year')\n plt.ylabel('Inflation')\n plt.grid", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(True)\n plt.show()\nelse:\n print(\"The file does not", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " exist\")\n```\n\nThis code will check if the file exists before trying to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " load it, and will print a message if the file does not exist.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file not being accessible.\\n\\nTo resolve this issue, you should ensure that the file exists and the path is correct. If the file does not exist, you will need to create it or obtain it from the relevant source. If the path is incorrect, you will need to update the path to the correct location of the file.\\n\\nAdditionally, you can use the `os` module to check if the file exists before trying to load it:\\n\\n```\\nimport os\\nimport pandas as pd\\n\\nfile_path = \"\"\\n\\nif os.path.isfile(file_path):\\n df = pd.read_csv(file_path)\\n print(df.head())\\n print(df.info())\\n print(df.describe())\\nelse:\\n print(\"The file does not exist\")\\n```\\n\\nThis code will check if the file exists before trying to load it, and will print a message if the file does not exist.', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " file\ndf = pd.read_csv(\"/var/folders/rb/qv", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "8vwgyj6yjd3t4pwsy9t", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "0rm0000gn/T/tmp5zsm1ywy/RKB", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "kAl1zinflation.csv\")\n\n# Convert the 'Year'", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " column to datetime\ndf['Year'] = pd.to_datetime(df['Year", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "'], format='%Y')\n\n# Group by", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " 'Year' and calculate the average inflation\ndf_avg_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "# Plot the average inflation as a time series\nplt.figure(figsize=(10", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ",6))\nplt.plot(df_avg_inflation['Year'], df_avg_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation['Inflation'], marker='o')\nplt.title('Average Yearly", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ".grid(True)\nplt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp5zsm1ywy/RKBkAl1zinflation.csv\")\n\n# Convert the 'Year' column to datetime\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\n\n# Group by 'Year' and calculate the average inflation\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n# Plot the average inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt.grid(True)\nplt.show()" + "metric": "completion_tokens", + "span_id": "1A0bWgLL", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:24.102404+00:00", + "__module__": "datetime" }, - "call_id": "61b988d6-45f4-4147-8b62-69c3abbb03a9", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "trace_id": "4a5HMcM9R3uWB4Cv", + "type": "metric", + "unit": "tokens", + "value": 10 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file not being accessible.\\n\\nTo resolve this issue, you should ensure that the file exists and the path is correct. If the file does not exist, you will need to create it or obtain it from the relevant source. If the path is incorrect, you will need to update the path to the correct location of the file.\\n\\nAdditionally, you can use the `os` module to check if the file exists before trying to read it. Here is an example:\\n\\n```\\nimport os\\nimport pandas as pd\\n\\nfile_path = \"\"\\n\\nif os.path.isfile(file_path):\\n df = pd.read_csv(file_path)\\n print(df.head())\\n print(df.info())\\n print(df.describe())\\nelse:\\n print(\"The file does not exist\")\\n```\\n\\nThis code will check if the file exists before trying to read it. If the file does not exist, it will print \"The file does not exist\".', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Convert the \\'Year\\' column to datetime\\ndf[\\'Year\\'] = pd.to_datetime(df[\\'Year\\'], format=\\'%Y\\')\\n\\n# Group by \\'Year\\' and calculate the average inflation\\ndf_avg_inflation = df.groupby(\\'Year\\')[\\'Inflation\\'].mean().reset_index()\\n\\n# Plot the average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation[\\'Year\\'], df_avg_inflation[\\'Inflation\\'], marker=\\'o\\')\\nplt.title(\\'Average Yearly Inflation\\')\\nplt.xlabel(\\'Year\\')\\nplt.ylabel(\\'Inflation\\')\\nplt.grid(True)\\nplt.show()'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=, system_message_behavior=)), ('tool_prompt_format', ), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file \"/var/folders/rb/qv8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "vwgyj6y", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "jd3t4pwsy9t0", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "rm0000gn/T/tmp1ugde3u9/FSj", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "wY288inflation.csv\" does not exist. This could be due", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to a number of reasons such as the file being deleted, the path being", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " incorrect, or the file not being accessible.\n\nTo resolve this issue, you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " should ensure that the file exists and the path is correct. If the file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " does not exist, you will need to create it", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " or obtain it from the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " relevant source. If the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " path is incorrect, you will need to update the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " path to the correct location of the file.\n\nAdditionally", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", you can use the `os` module to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " check if the file exists before trying to read it", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Here is an example:\n\n```\nimport os", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\nimport pandas as pd\nimport matplotlib.pyplot as plt\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "file_path = \"/var/folders/rb/qv8vwgyj", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "6yjd3t4pwsy9t0rm0000", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "gn/T/tmp1ugde3u9/FSjwY288", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "inflation.csv\"\n\nif os.path.isfile(file_path):\n df = pd", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".read_csv(file_path)\n df['Year'] = pd.to_datetime(df", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "['Year'], format='%Y')\n df_avg", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_inflation = df.groupby('Year')['Inflation", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'].mean().reset_index()\n plt.figure(figsize=(10,6))\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " plt.plot(df_avg_inflation['Year'], df_avg_inflation['", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Inflation'], marker='o')\n plt.title('Average Yearly In", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "flation')\n plt.xlabel('Year')\n plt", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".ylabel('Inflation')\n plt.grid(True)\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " plt.show()\nelse:\n print(\"The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file does not exist\")\n```\n\nThis code will", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " check if the file exists before trying to read it. If the file does", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not exist, it will print \"The file does not exist\".", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\"), CompletionMessage(role='assistant', content='The error message indicates that the file \"\" does not exist. This could be due to a number of reasons such as the file being deleted, the path being incorrect, or the file not being accessible.\\n\\nTo resolve this issue, you should ensure that the file exists and the path is correct. If the file does not exist, you will need to create it or obtain it from the relevant source. If the path is incorrect, you will need to update the path to the correct location of the file.\\n\\nAdditionally, you can use the `os` module to check if the file exists before trying to read it. Here is an example:\\n\\n```\\nimport os\\nimport pandas as pd\\n\\nfile_path = \"\"\\n\\nif os.path.isfile(file_path):\\n df = pd.read_csv(file_path)\\n print(df.head())\\n print(df.info())\\n print(df.describe())\\nelse:\\n print(\"The file does not exist\")\\n```\\n\\nThis code will check if the file exists before trying to read it. If the file does not exist, it will print \"The file does not exist\".', stop_reason=, tool_calls=[]), UserMessage(role='user', content='Plot average yearly inflation as a time series', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " file\ndf = pd.read_csv(\"/var/folders/rb/qv", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "8vwgyj6yjd3t4pwsy9t", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "0rm0000gn/T/tmp1ugde3u9/FS", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "jwY288inflation.csv\")\n\n# Convert the 'Year' column", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " to datetime\ndf['Year'] = pd.to_datetime(df['Year'],", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " format='%Y')\n\n# Group by", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " 'Year' and calculate the average inflation\ndf_avg_in", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "flation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "# Plot the average yearly inflation as a time series\n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "plt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "Year'], df_avg_inflation['Inflation'], marker='o')\nplt", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ".title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "('Inflation')\nplt.grid(True)\nplt.show()", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmp1ugde3u9/FSjwY288inflation.csv\")\n\n# Convert the 'Year' column to datetime\ndf['Year'] = pd.to_datetime(df['Year'], format='%Y')\n\n# Group by 'Year' and calculate the average inflation\ndf_avg_inflation = df.groupby('Year')['Inflation'].mean().reset_index()\n\n# Plot the average yearly inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(df_avg_inflation['Year'], df_avg_inflation['Inflation'], marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Inflation')\nplt.grid(True)\nplt.show()" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "da5760dd-614a-4c19-954c-b4e354e75d79", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file \"/var/folders/rb/qv8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "vwgyj6yjd3t4pwsy9t0", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "rm0000gn/T/tmpbb210725/duWDtjG", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ninflation.csv\" does not exist. This could be", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " due to a number of reasons such as the file being deleted,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the path being", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " incorrect, or the file being moved to a different location.\n\nTo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " resolve this issue, you should ensure that", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file exists and the path is correct. If the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file does exist, you can try to load", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " it using the correct path. If the file does", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not exist, you will need to create it or obtain", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " it from the relevant", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " source.\n\nHere is an example of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " how you can modify the code to handle this situation:\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "```\nimport pandas as pd\n\n# Define the path to the CSV file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\nfile_path = \"/var/folders/rb/qv8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "vwgyj6yjd3t4pwsy9t0", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "rm0000gn/T/tmpbb210725/duWDtjG", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ninflation.csv\"\n\n# Check if the file exists\nimport os", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\nif os.path.isfile(file_path):\n # Load", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the CSV file\n df = pd.read_csv(file_path)\n\n ", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " # Print the first few rows of the dataframe\n print(df.head())\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " # Print information about", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the dataframe\n print(df.info())\n\n # Print summary statistics about the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " dataframe\n print(df.describe())\nelse:\n print(\"The file does", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not exist.\")\n```\n\nThis code will check if the file exists before", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " attempting to load it. If the file does not exist, it will print", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a message indicating that the file does not exist.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=, system_message_behavior=)), ('tool_prompt_format', ), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file \"/var/folders", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "/rb/qv8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "vwgyj6yjd3t4pwsy9t0", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "rm0000gn/T/tmp5zsm1ywy/RKBk", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Al1zinflation.csv\" does not exist. This could be", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " due to a number of reasons such as the file being deleted, the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " path being incorrect, or the file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not being accessible.\n\nTo resolve this issue, you should ensure", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " that the file exists and the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " path is correct. If the file does not", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " exist, you will need to create it or obtain it", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " from the relevant", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " source. If the path is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " incorrect, you will need to update the path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " to the correct location of the file.\n\nAdditionally,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " you can use the `os` module to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " check if the file exists before trying to load it:\n\n``", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "`\nimport os\nimport pandas as pd\n\nfile_path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " = \"/var/folders", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "/rb/qv8vwgyj6y", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "jd3t4pwsy9t0rm0000gn/T", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "/tmp5zsm1ywy/RKBkAl1zinflation.csv", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\"\n\nif os.path.isfile(file_path):\n df =", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " pd.read_csv(file_path)\n print(df.head())\n print", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(df.info())\n print(df.describe())\nelse:\n print(\"The file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " does not exist\")\n```\n\nThis code will check if the file exists before", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " trying to load it, and will print a message if", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the file does not exist.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')]), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " error message indicates that the file \"/var/folders/rb/qv8", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "vwgyj6yjd3t4p", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "wsy9t0rm0000gn/T/tmpdcpkc9", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_f/FKWQnYoVinflation.csv\"", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " does not exist. This could be", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " due to a number of reasons such as the file being deleted, the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " path being incorrect, or the file", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " being in a different location.\n\nTo resolve this issue, you can try", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the following:\n\n1. Check the file path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ": Make sure the file path is correct and the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file exists in the specified location.\n2. Use a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " relative path", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ": If the file is in the same directory as", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " your Python script, you can use", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a relative path instead of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " an absolute path.\n3. Check file permissions", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ": Make sure you have the necessary permissions to read the file.\n4.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Use a try-except block: You can use a try-except", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " block to catch the FileNotFoundError and handle it accordingly.\n\nHere is an example of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " how you can modify the code to handle the FileNotFoundError:\n\n```\nimport pandas", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " as pd\n\ntry:\n df =", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " pd.read_csv(\"/var/folders/rb/qv8vwgyj", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "6yjd3t", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "4pwsy9t0rm0000", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "gn/T/tmpdcpkc9_f/FKWQnYoVinflation", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".csv\")\n print(df.head())\n print(df.info())\n print(df", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".describe())\nexcept FileNotFoundError:\n print(\"The file does not exist\")\n``", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "`\n\nThis code will print \"The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " file does not exist\" if the file is not found, instead", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " of raising an error.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Here is a csv, can you describe it?', context=None), ToolResponseMessage(role='tool', call_id='', tool_name=, content=[TextContentItem(type='text', text='# User provided a file accessible to you at \"\"\\nYou can use code_interpreter to load and inspect it.')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "import pandas as pd\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "/rb/qv8vwgyj6yjd3t4pwsy9t0", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "rm0000gn/T/tmpbb210725/duWDtjGninflation.csv\")\n\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Print the first few rows of the dataframe\nprint(df.head())\n\n#", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " Print information about", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " the dataframe\nprint(df.info())\n\n# Print summary statistics about the", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " dataframe\nprint(df.describe())", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "import pandas as pd\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/rb/qv8vwgyj6yjd3t4pwsy9t0rm0000gn/T/tmpbb210725/duWDtjGninflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print information about the dataframe\nprint(df.info())\n\n# Print summary statistics about the dataframe\nprint(df.describe())" + "metric": "total_tokens", + "span_id": "1A0bWgLL", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:24.102411+00:00", + "__module__": "datetime" }, - "call_id": "3ab348fd-a9b8-47d7-be10-7d38159c9a0d", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "trace_id": "4a5HMcM9R3uWB4Cv", + "type": "metric", + "unit": "tokens", + "value": 47 + } + ] + } } ], "type": "generator" }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:3e3a0\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:fd0f6\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help. What's your first question about Torchtune?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'using LoRA in Torchtune'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:7da0c\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:7da0c\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:7da0c\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune\\'s LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we\\'ve loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\"\"\"\\n {total_params} total params,\\n {trainable_params}\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \"\"\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe , tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}": { "chunks": [ { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA in Torchtune, you can follow", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " these steps:\n\n1. Import the necessary modules: `", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "from torchtune.models.llama2 import llama2_7b", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", lora_llama2_7b`\n2. Create a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama2 model with LoRA: `lora", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_model = lora_llama2_7b(lora_attn_modules", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "=[\"q_proj\", \"v_proj\"])`\n3. Load the pre-trained", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama2 weights into the LoRA", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " model: `lora_model.load_state_dict(base_model.state_dict(), strict", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "=False)`\n4. Set only LoRA parameters to trainable:", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " `from torchtune.modules.peft.peft_utils import get", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_adapter_params, set_trainable_params`\n5. Run the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " LoRA finetune using torchtune's Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA recipe: `tune run --", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nnodes 1 --nproc_per_node 2 lora_finet", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "une_distributed --config llama2/7B_lora`\n\nYou can", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " also experiment with different LoRA configurations, such as applying Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA to all linear layers in the self-attention, increasing", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the rank, and scaling alpha and rank together.\n\nNote: You", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " need to have the Llama2 weights and tokenizer downloaded", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and installed, and you need to have the necessary dependencies", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " installed, including torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and PyTorch.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:3e3a0\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:fd0f6\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help. What's your first question about Torchtune?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[k", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nowledge_search(query=\"using LoRA in Torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\")]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "tool_call": { - "arguments": { - "query": "using LoRA in Torchtune" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" }, - "call_id": "62b19206-ed9f-42d1-a614-1582d8598193", - "tool_name": "knowledge_search" + "tool_call": "", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:3e3a0\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:fd0f6\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'m ready to help. What", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'s your first question about Torchtune?", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f76dc\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:de2d4\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help. What's your first question about Torchtune?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'using LoRA in Torchtune'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text=\"Result 1:\\nDocument_id:c4fc3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet\\'s inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer\\'s self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:c4fc3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 4:\\nDocument_id:c4fc3\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune\\'s LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we\\'ve loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\"\"\"\\n {total_params} total params,\\n {trainable_params}\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \"\"\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune\\'s `LoRA recipe , tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "To", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " use LoRA in Torchtune, you can follow", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " these steps:\n\n1. Import the necessary modules: `", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "from torch", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "tune.models.llama2 import llama2_7b", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ", lora_llama2_7b`\n2. Create a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama2 model with LoRA: `lora", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_model = lora_ll", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "ama2_7b", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "(lora_attn_modules=[\"q_proj\", \"v_proj\"])`\n3", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ". Load the pre-trained Llama2 weights into", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the LoRA model: `lora_model.load_state", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_dict(base_model.state_dict(), strict=False)`\n4. Set only Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA parameters to trainable: `from torchtune.modules.peft", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".peft_utils import get_adapter_params,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " set_trainable_params`\n5. Run the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " LoRA finetune using torchtune's Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA recipe: `tune run --", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nnodes 1 --nproc_per_node 2 lora_finet", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "une_distributed --config llama2/7B_lora`\n\nYou can", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " also experiment with different LoRA configurations, such as applying Lo", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "RA to all linear layers in the self-attention, increasing the rank,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " and scaling alpha and rank together.\n\n", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Note: You need to have the pre-trained", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Llama2 weights and tokenizer downloaded and installed before running the LoRA fin", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "etune. Additionally, you can use torch", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "tune's `WandBLogger` to generate", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " loss curves and track the experiment's", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " progress.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f76dc\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:de2d4\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')]), CompletionMessage(role='assistant', content=\"I'm ready to help. What's your first question about Torchtune?\", stop_reason=, tool_calls=[]), UserMessage(role='user', content='Tell me how to use LoRA', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[k", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nowledge_search(query=\"using LoRA in Torchtune", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "\")]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "query": "using LoRA in Torchtune" + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "8413a252-8372-4061-a4a1-0a1d165dd373", - "tool_name": "knowledge_search" + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point_with", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Torchtune documentation'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:f76dc\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\"json\",\\n data_files=\"data/my_data.json\",\\n split=\"train\",\\n conversation_column=\"dialogue\",\\n conversation_style=\"sharegpt\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we\\'re fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral\\'s :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune\\'s factory settings, but we may want to experiment a bit.\\nLet\\'s take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: [\\'q_proj\\', \\'v_proj\\']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:de2d4\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we\\'ve enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "I", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'m ready to help. What's", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " your first question about Torchtune?", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='I am attaching some documentation for Torchtune. Help me answer questions I will ask next.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[k", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nowledge_search(query=\"Torchtune documentation\")]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "query": "Torchtune documentation" + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "f21015ed-e70b-4a2b-a038-9335acbe0c53", - "tool_name": "knowledge_search" + "tool_call": "_metadata\", \"parameters\": {\"liquid_name\": \"polyjuice\", \"", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \"role\": \"system\",\\n \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=, system_message_behavior=)), ('tool_prompt_format', ), ('tools', [ToolDefinition(tool_name='insert_into_memory', description='Insert documents into memory', parameters={}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "L", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lama3-8B uses grouped-query", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " attention instead of the standard multi-head attention.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \"role\": \"system\",\\n \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=, system_message_behavior=)), ('tool_prompt_format', ), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "L", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lama3-8B uses grouped-query attention instead of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the standard multi-head attention.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \"role\": \"system\",\\n \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='insert_into_memory', description='Insert documents into memory', parameters={}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "L", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lama3-8B uses grouped-query", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " attention instead of the standard multi-head attention.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Llama3-8B attention type'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n'), TextContentItem(type='text', text=\"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\"), TextContentItem(type='text', text=\"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\"), TextContentItem(type='text', text='Result 4:\\nDocument_id:num-0\\nContent: \\'m Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let\\'s walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet\\'s say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \"role\": \"system\",\\n \"\\n'), TextContentItem(type='text', text='Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet\\'s take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "L", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "lama3-8B uses grouped-query attention instead of", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the standard", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " multi-head attention.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='insert_into_memory', description='Insert documents into memory', parameters={}), ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[k", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nowledge_search(query=\"Llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "3-8B attention type\")]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "query": "Llama3-8B attention type" + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" }, - "call_id": "bf3bf9f9-0e56-4720-a6a9-be8ad9e8dfcb", - "tool_name": "knowledge_search" + "tool_call": "celcius\": \"true\"}}", + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Instead of the standard multi-head attention, what attention type does Llama3-8B use?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[k", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nowledge_search(query=\"Llama", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "3-8B attention type\")]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "query": "Llama3-8B attention type" + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" }, - "call_id": "9c9a922f-afd6-4bc8-83ba-28211bb3fd29", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'query': 'current CEO of Meta'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content='{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company\\'s position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Forbes\", \"url\": \"https://www.forbes.com/profile/mark-zuckerberg/\", \"content\": \"Meta CEO Mark Zuckerberg \\\\u201cloved\\\\u201d an image on Facebook known as \\\\\"Challah Horse\\\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\", \"score\": 0.6410185, \"raw_content\": null}]}')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=, system_message_behavior=)), ('tool_prompt_format', ), ('tools', [ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " current CEO of Meta is Mark Zuckerberg.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'query': 'current CEO of Meta'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content='{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company\\'s position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Forbes\", \"url\": \"https://www.forbes.com/profile/mark-zuckerberg/\", \"content\": \"Meta CEO Mark Zuckerberg \\\\u201cloved\\\\u201d an image on Facebook known as \\\\\"Challah Horse\\\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\", \"score\": 0.6410185, \"raw_content\": null}]}')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " current CEO of Meta is Mark Zuckerberg.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Search the web and tell me who the current CEO of Meta is.', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "brave_search.call(query=\"current CEO of Meta\")", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "current CEO of Meta" + "tool_call": { + "arguments": { + "celcius": "true", + "liquid_name": "polyjuice" + }, + "call_id": "ee5ac18d-de3b-4985-9e93-545de166d3e2", + "tool_name": "get_boiling_point_with_metadata" }, - "call_id": "2039dce8-afbe-4517-bb4a-43c92dab8cff", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "brave_search" - } + "type": "tool_call" }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': True})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " boiling point of polyjuice is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " -100\u00b0C.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': True})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " boiling point of polyjuice is -100 degrees Celsius.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='get_boiling_point', arguments={'liquid_name': 'polyjuice', 'celcius': True})]), ToolResponseMessage(role='tool', call_id='', tool_name='get_boiling_point', content='-100')])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " provided function \"get_boiling_point\" is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not sufficient to", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " answer the question as it does not contain information", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " about the boiling point of \"poly", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "juice\". Polyjuice is not a", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " real liquid and does", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " not have a known boiling point. If you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " have any other questions or need", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " information about a different liquid,", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " I would be happy to try and", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " assist you.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice='get_boiling_point', tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "get", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_boiling_point(liquid", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "_name='polyjuice', celcius=True)]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" }, - "tool_call": { - "arguments": { - "celcius": true, - "liquid_name": "polyjuice" + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "302993c2-3c56-48cf-8891-afac1f20723e", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)}), ToolDefinition(tool_name=, description='Search the web for information', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "get_boiling_point(liquid_name", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "='polyjuice', celcius=True)]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": true, - "liquid_name": "polyjuice" + "metric": "prompt_tokens", + "span_id": "dsGyjpUB", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:45.316534+00:00", + "__module__": "datetime" }, - "call_id": "9544e61b-5e69-427b-b30c-874fdbcf53f7", - "tool_name": "get_boiling_point" + "trace_id": "BO0etAZ6RFmGmLCW", + "type": "metric", + "unit": "tokens", + "value": 37 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Poly", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "juice is a fictional potion from", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the Harry Potter series by J.K. Rowling. As it", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'s not a real substance, it doesn", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "'t have a boiling point. Polyjuice Potion is", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a magical concoction that allows the drinker to assume the", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " form and appearance of another person, but it's not", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " a physical substance that can be measured or analyzed in the same", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " way as real-world chemicals.\n\nIf", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " you have any other questions or if there's anything else I can help you", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " with, feel free to ask!", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='What is the boiling point of polyjuice?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='get_boiling_point', description='Returns the boiling point of a liquid in Celcius or Fahrenheit', parameters={'liquid_name': ToolParamDefinition(param_type='string', description='The name of the liquid', required=True, default=None), 'celcius': ToolParamDefinition(param_type='bool', description='Whether to return the boiling point in Celcius', required=False, default=True)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "get_boiling_point(liquid_name='polyjuice", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "', celcius=True)]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "celcius": true, - "liquid_name": "polyjuice" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "ce595f0c-86f3-4055-b675-09e00007dc97", - "tool_name": "get_boiling_point" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Write code and execute it to find the answer for: What is the 100th prime number?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(nth_prime(100))'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=, system_message_behavior=)), ('tool_prompt_format', ), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " 100th prime number is 541", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Write code and execute it to find the answer for: What is the 100th prime number?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name=, arguments={'code': 'def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(nth_prime(100))'})]), ToolResponseMessage(role='tool', call_id='', tool_name=, content=\"error\\n[stdout]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stdout]\\n[stderr]\\n[Errno 2] No such file or directory: 'bwrap'\\n[/stderr]\")])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " 100th prime number is 541", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ".", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='Write code and execute it to find the answer for: What is the 100th prime number?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "started" - }, - "tool_call": "", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "def is_prime(n):\n if n <= 1:\n return False", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "\n if n <=", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " ", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "3:\n return True\n if n % 2 == 0", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " or n % 3 == 0:\n return False\n i", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " = 5\n while i * i <=", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " n:\n if n % i == 0 or n % (i", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " + 2) == 0:\n return False\n i +=", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " 6\n return True\n\ndef nth_prime(n):\n count =", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " 0\n num = 2\n while True:\n if", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": " is_prime(num):\n count += 1\n if count == n", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": ":\n return num\n num += 1\n\nprint(nth_prime", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "in_progress" - }, - "tool_call": "(100))", - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "code": "def is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef nth_prime(n):\n count = 0\n num = 2\n while True:\n if is_prime(num):\n count += 1\n if count == n:\n return num\n num += 1\n\nprint(nth_prime(100))" + "metric": "completion_tokens", + "span_id": "dsGyjpUB", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:45.316569+00:00", + "__module__": "datetime" }, - "call_id": "63d06ce7-5266-4ee8-a620-0e81cf5108a1", - "tool_name": { - "__enum__": "BuiltinTool", - "value": "code_interpreter" - } + "trace_id": "BO0etAZ6RFmGmLCW", + "type": "metric", + "unit": "tokens", + "value": 10 }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'Perplexity the company founding date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "Per", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "plexity the company was founded in 202", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "2.", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was Perplexity the company founded?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[k", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nowledge_search(query=\"Perplexity the company", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " founding date\")]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "Perplexity the company founding date" + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" }, - "call_id": "3804eaba-07f8-448c-8dd4-8ee14d748a05", - "tool_name": "knowledge_search" - }, - "type": "tool_call" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None), CompletionMessage(role='assistant', content='', stop_reason=, tool_calls=[ToolCall(call_id='', tool_name='knowledge_search', arguments={'query': 'NBA creation date'})]), ToolResponseMessage(role='tool', call_id='', tool_name='knowledge_search', content=[TextContentItem(type='text', text='knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n'), TextContentItem(type='text', text='Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n'), TextContentItem(type='text', text='Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n'), TextContentItem(type='text', text='END of knowledge_search tool results.\\n')])])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "The", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " NBA was created on August 3, 1949, with", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " the merger of the Basketball", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": " Association of America (BAA) and the National Basketball League (NBL", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": ").", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null - } - ], - "type": "generator" - }, - "('meta-llama/Llama-3.3-70B-Instruct', [SystemMessage(role='system', content='You are a helpful assistant'), UserMessage(role='user', content='when was the nba created?', context=None)])_[('response_format', None), ('sampling_params', SamplingParams(strategy=TopPSamplingStrategy(type='top_p', temperature=0.0001, top_p=0.9), max_tokens=0, repetition_penalty=1.0)), ('stream', True), ('tool_config', ToolConfig(tool_choice=, tool_prompt_format=None, system_message_behavior=)), ('tool_prompt_format', None), ('tools', [ToolDefinition(tool_name='knowledge_search', description='Search for information in a database.', parameters={'query': ToolParamDefinition(param_type='string', description='The query to search for. Can be a natural language sentence or keywords.', required=True, default=None)}), ToolDefinition(tool_name=, description='Execute code', parameters={'code': ToolParamDefinition(param_type='string', description='The code to execute', required=True, default=None)})])]": { - "chunks": [ - { - "event": { - "delta": { - "text": "", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "start" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "[k", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "text": "nowledge_search(query=\"NBA creation date\")]", - "type": "text" - }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": null - }, - "metrics": null - }, - { - "event": { - "delta": { - "parse_status": { - "__enum__": "ToolCallParseStatus", - "value": "succeeded" - }, - "tool_call": { - "arguments": { - "query": "NBA creation date" + "metric": "total_tokens", + "span_id": "dsGyjpUB", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:45.316576+00:00", + "__module__": "datetime" }, - "call_id": "d94006c1-5692-4ada-8f1a-d09ef2d46dab", - "tool_name": "knowledge_search" + "trace_id": "BO0etAZ6RFmGmLCW", + "type": "metric", + "unit": "tokens", + "value": 47 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Give me a sentence that contains the word: hello\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": []}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" }, - "type": "tool_call" + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "progress" - }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "metrics": null + } }, { - "event": { - "delta": { - "text": "", - "type": "text" + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "event_type": { - "__enum__": "ChatCompletionResponseEventType", - "value": "complete" + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " customer smiled and said \"hello\" to the friendly store clerk.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null }, - "logprobs": null, - "stop_reason": { - "__enum__": "StopReason", - "value": "end_of_turn" - } - }, - "metrics": null + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "R9a1QHt4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:19.586300+00:00", + "__module__": "datetime" + }, + "trace_id": "t-ZRvSMzTCudL6SB", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "R9a1QHt4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:19.586359+00:00", + "__module__": "datetime" + }, + "trace_id": "t-ZRvSMzTCudL6SB", + "type": "metric", + "unit": "tokens", + "value": 24 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "R9a1QHt4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:19.586367+00:00", + "__module__": "datetime" + }, + "trace_id": "t-ZRvSMzTCudL6SB", + "type": "metric", + "unit": "tokens", + "value": 54 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"\")\\nprint(df.head())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " error message indicates that the `bwrap.core", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "` module is not found. This", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is because the `bwrap` module is not installed in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " your Python environment.\n\nTo fix this issue,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " you can use the `knowledge_search` function to describe the CSV", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file. This function can be used to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " search for information in a database, and it might have", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " access to information about the CSV file.\n\nHere is an example of", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " how you can use the `knowledge_search` function to describe the CSV", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file:\n\n```\n{\n \"type\": \"function\",\n \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "name\": \"knowledge_search\",\n \"parameters\":", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " {\n \"query\": \"Describe the CSV file at /var", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/folders/cz/vyh7y1", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "d11xg881lsxsshnc5c0000gn/T", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/tmpvto5j2dr/u8MQ2jywin", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "flation.csv\"\n }\n}\n```", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "9UjZne1U", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:15.341367+00:00", + "__module__": "datetime" + }, + "trace_id": "cOvUfJZLSK2vci9f", + "type": "metric", + "unit": "tokens", + "value": 149 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "9UjZne1U", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:15.341380+00:00", + "__module__": "datetime" + }, + "trace_id": "cOvUfJZLSK2vci9f", + "type": "metric", + "unit": "tokens", + "value": 188 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "9UjZne1U", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:15.341383+00:00", + "__module__": "datetime" + }, + "trace_id": "cOvUfJZLSK2vci9f", + "type": "metric", + "unit": "tokens", + "value": 337 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\ndf = pd", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".read_csv(\"/var/folders/cz/vyh7y1", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "d11xg881lsxsshnc5c0000", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "gn/T/tmpvto5j2", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "dr/u8MQ2jywinflation.csv\")\nprint(df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".head())", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpvto5j2dr/u8MQ2jywinflation.csv\")\nprint(df.head())" + }, + "call_id": "ecc9db21-332f-4931-8820-cf139f8a0b88", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "6VEDipbd", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:14.030541+00:00", + "__module__": "datetime" + }, + "trace_id": "cOvUfJZLSK2vci9f", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "6VEDipbd", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:14.030577+00:00", + "__module__": "datetime" + }, + "trace_id": "cOvUfJZLSK2vci9f", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "6VEDipbd", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:14.030584+00:00", + "__module__": "datetime" + }, + "trace_id": "cOvUfJZLSK2vci9f", + "type": "metric", + "unit": "tokens", + "value": 47 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the csv file and I can help you describe it. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n# Print the number of missing values in each column\\nprint(df.isnull().sum())\\n```\\n\\nThis will give you an idea of what the csv file contains.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "This", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " code will create a line plot of the average yearly inflation over time", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". The x-axis represents the year and the y-axis represents the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " average inflation. Each point on the plot represents the average inflation for", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " a particular year.\n\nPlease note that you need to replace 'in", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "flation.csv' with the actual path to your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " csv file. Also, this code assumes that the csv file has a", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " column named 'date' and another column named 'inflation'. If your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " csv file has different column names, you need to replace 'date' and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " 'inflation' with the actual column names.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "Hm1BkrMQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:32:41.982115+00:00", + "__module__": "datetime" + }, + "trace_id": "T857cf9QSamVBOAy", + "type": "metric", + "unit": "tokens", + "value": 636 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "Hm1BkrMQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:32:41.982147+00:00", + "__module__": "datetime" + }, + "trace_id": "T857cf9QSamVBOAy", + "type": "metric", + "unit": "tokens", + "value": 126 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "Hm1BkrMQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:32:41.982153+00:00", + "__module__": "datetime" + }, + "trace_id": "T857cf9QSamVBOAy", + "type": "metric", + "unit": "tokens", + "value": 762 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If the file is too large to be uploaded, you can provide a sample of the csv file and I can help you describe it. \\n\\nHere is an example of how you can describe a csv file using pandas:\\n\\n```\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the data\\nprint(df.head())\\n# Print the last 5 rows of the data\\nprint(df.tail())\\n# Print the summary statistics of the data\\nprint(df.describe())\\n# Print the data types of each column\\nprint(df.dtypes)\\n# Print the number of missing values in each column\\nprint(df.isnull().sum())\\n```\\n\\nThis will give you an idea of what the csv file contains.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " data\ndf = pd.read_csv('inflation.csv')\n\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Convert date column to datetime\ndf['date'] = pd.to", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_datetime(df['date'])\n\n# Group by year and calculate average inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\naverage_inflation = df.groupby(df['date'].dt.year", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ")['inflation'].mean()\n\n# Plot the time series\nplt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".figure(figsize=(10,6))\nplt.plot(average_inflation", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".index, average_inflation.values, marker='o')\nplt.title", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "('Average Yearly Inflation')\nplt.xlabel('Year')\nplt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".ylabel('Average Inflation')\nplt.grid(True)\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "plt.show()", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert date column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()" + }, + "call_id": "4849f8b5-bbb8-4c7e-8f19-498dd559dbe2", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "ZKjmS7HQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:32:30.999750+00:00", + "__module__": "datetime" + }, + "trace_id": "T857cf9QSamVBOAy", + "type": "metric", + "unit": "tokens", + "value": 450 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "ZKjmS7HQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:32:30.999780+00:00", + "__module__": "datetime" + }, + "trace_id": "T857cf9QSamVBOAy", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "ZKjmS7HQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:32:30.999786+00:00", + "__module__": "datetime" + }, + "trace_id": "T857cf9QSamVBOAy", + "type": "metric", + "unit": "tokens", + "value": 460 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a remote server or a local machine, you can use the `pd.read_csv()` function to load the csv file. \\n\\nHere is an example:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column dtypes, non-nullable counts, and memory usage), and the descriptive statistics of the dataframe.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "This", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " code will create a line plot of the average yearly inflation over time.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " The x-axis represents the year and the y-axis represents the average inflation.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " The plot also includes a title, labels for the x and y axes,", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and a grid for better visibility.\n\nPlease note that you need to replace '", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "inflation.csv' with the actual path to", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " your csv file. Also, this code", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " assumes that the 'date' column in your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " csv file is in a format that can be", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " parsed by pandas' `to_datetime` function. If your date", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " column is in a different format, you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " may need to specify the format using the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " `format` parameter of `to_datetime`.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "Yv7iXXNJ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:50.214420+00:00", + "__module__": "datetime" + }, + "trace_id": "srzTfsP6Sr-co-Ll", + "type": "metric", + "unit": "tokens", + "value": 621 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "Yv7iXXNJ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:50.214481+00:00", + "__module__": "datetime" + }, + "trace_id": "srzTfsP6Sr-co-Ll", + "type": "metric", + "unit": "tokens", + "value": 143 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "Yv7iXXNJ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:50.214490+00:00", + "__module__": "datetime" + }, + "trace_id": "srzTfsP6Sr-co-Ll", + "type": "metric", + "unit": "tokens", + "value": 764 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a remote server or a local machine, you can use the `pd.read_csv()` function to load the csv file. \\n\\nHere is an example:\\n\\n```python\\nimport pandas as pd\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n# Print the first 5 rows of the dataframe\\nprint(df.head())\\n# Print the summary of the dataframe\\nprint(df.info())\\nprint(df.describe())\\n```\\n\\nThis will print the first 5 rows of the dataframe, the summary of the dataframe (including the index dtype and column dtypes, non-nullable counts, and memory usage), and the descriptive statistics of the dataframe.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\ndf = pd.read_csv('inflation.csv')\n\n# Convert", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 'date' column to datetime\ndf['date'] = pd.to", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_inflation = df.groupby(df['date'].dt.year)['inflation'].", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "))\nplt.plot(average_inflation.index, average_inflation.values, marker", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".show()", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv('inflation.csv')\n\n# Convert 'date' column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot the time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()" + }, + "call_id": "62e5a10d-8a59-41e7-9f0e-87cabc7d15fa", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "dv6g9n2H", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:48.391101+00:00", + "__module__": "datetime" + }, + "trace_id": "srzTfsP6Sr-co-Ll", + "type": "metric", + "unit": "tokens", + "value": 433 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "dv6g9n2H", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:48.391113+00:00", + "__module__": "datetime" + }, + "trace_id": "srzTfsP6Sr-co-Ll", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "dv6g9n2H", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:48.391116+00:00", + "__module__": "datetime" + }, + "trace_id": "srzTfsP6Sr-co-Ll", + "type": "metric", + "unit": "tokens", + "value": 443 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "It", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " seems that the file \"/var/folders/cz/vyh7", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "y1d11xg881lsxsshnc5c", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "0000gn/T/tmpvto5j", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "2dr/JwKzVg", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "5Ainflation.csv\" does not exist. \n\nTo describe the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " csv file, you need to provide the actual file path or the file itself", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". If you are using a remote server or a local machine, you can", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " use the `pd.read_csv()` function to load", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the csv file. \n\nHere is an example:\n\n```", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "python\nimport pandas as pd\n# Load data\ndf =", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " pd.read_csv('inflation.csv')\n# Print the first 5 rows", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of the dataframe\nprint(df.head())\n# Print", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the summary of the dataframe\nprint(df.info())\nprint(df.describe", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "())\n```\n\nThis will print the first 5 rows of the dataframe", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", the summary of the dataframe", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " (including the index dtype and column dtypes, non-nullable", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " counts, and memory usage), and the descriptive statistics of the dataframe.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "qV1E8nPK", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:41.439164+00:00", + "__module__": "datetime" + }, + "trace_id": "GG3oeA3qRH6WIf6Z", + "type": "metric", + "unit": "tokens", + "value": 215 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "qV1E8nPK", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:41.439188+00:00", + "__module__": "datetime" + }, + "trace_id": "GG3oeA3qRH6WIf6Z", + "type": "metric", + "unit": "tokens", + "value": 216 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "qV1E8nPK", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:41.439190+00:00", + "__module__": "datetime" + }, + "trace_id": "GG3oeA3qRH6WIf6Z", + "type": "metric", + "unit": "tokens", + "value": 431 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\n# Load data\ndf = pd.read", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_csv(\"/var/folders/cz/vyh7y1d11", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "xg881lsxsshnc5c0000gn/T/tmp", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "vto5j2dr/JwKzVg", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "5Ainflation.csv\")\n# Rows\nprint(\"Number", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " of rows and columns in the data:\", df.shape)\n# Columns", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\nprint(\"Columns of the data", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " are:\", len(df.columns))\n# Column names\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(\"Columns of the data are:\", df.columns)\n# Column dt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "ypes\nprint(\"Datatype of the columns are:\", df", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".dtypes)", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmpvto5j2dr/JwKzVg5Ainflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)" + }, + "call_id": "87c3ef49-27e0-4561-ade3-83569a0fe236", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "9OTP08Yr", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:39.830624+00:00", + "__module__": "datetime" + }, + "trace_id": "GG3oeA3qRH6WIf6Z", + "type": "metric", + "unit": "tokens", + "value": 36 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "9OTP08Yr", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:39.830656+00:00", + "__module__": "datetime" + }, + "trace_id": "GG3oeA3qRH6WIf6Z", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "9OTP08Yr", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:39.830662+00:00", + "__module__": "datetime" + }, + "trace_id": "GG3oeA3qRH6WIf6Z", + "type": "metric", + "unit": "tokens", + "value": 46 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:61fc5\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d5787\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:af027\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:61fc5\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d5787\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "knowledge_search\", \"parameters\": {\"query\": \"How", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to use LoRA in Torchtune\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "How to use LoRA in Torchtune" + }, + "call_id": "14b82c7e-18d4-4b46-8f07-442be700e8ae", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "DBZOtUux", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:58.136315+00:00", + "__module__": "datetime" + }, + "trace_id": "XVSIgZRXR_aHBiAN", + "type": "metric", + "unit": "tokens", + "value": 117 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "DBZOtUux", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:58.136380+00:00", + "__module__": "datetime" + }, + "trace_id": "XVSIgZRXR_aHBiAN", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "DBZOtUux", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:58.136387+00:00", + "__module__": "datetime" + }, + "trace_id": "XVSIgZRXR_aHBiAN", + "type": "metric", + "unit": "tokens", + "value": 157 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:61fc5\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:af027\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:d5787\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready to help you answer questions about Torchtune based on the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " documentation you provided. What's your first question?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "gFK_4CQi", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:56.169962+00:00", + "__module__": "datetime" + }, + "trace_id": "A2oXFF9fRz2-Lc9N", + "type": "metric", + "unit": "tokens", + "value": 75 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "gFK_4CQi", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:56.169995+00:00", + "__module__": "datetime" + }, + "trace_id": "A2oXFF9fRz2-Lc9N", + "type": "metric", + "unit": "tokens", + "value": 35 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "gFK_4CQi", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:56.170001+00:00", + "__module__": "datetime" + }, + "trace_id": "A2oXFF9fRz2-Lc9N", + "type": "metric", + "unit": "tokens", + "value": 110 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78970\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:cbeb1\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8404f\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78970\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:cbeb1\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters\":", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " {\"query\": \"How to use LoRA in Torchtune\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "How to use LoRA in Torchtune" + }, + "call_id": "dc7dd9e0-6ca1-452e-bb62-532a09e71848", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "1iT28abM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:33:53.948952+00:00", + "__module__": "datetime" + }, + "trace_id": "gd_zuJXnSaSfS3ZK", + "type": "metric", + "unit": "tokens", + "value": 117 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "1iT28abM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:33:53.949001+00:00", + "__module__": "datetime" + }, + "trace_id": "gd_zuJXnSaSfS3ZK", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "1iT28abM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:33:53.949013+00:00", + "__module__": "datetime" + }, + "trace_id": "gd_zuJXnSaSfS3ZK", + "type": "metric", + "unit": "tokens", + "value": 157 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78970\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:8404f\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:cbeb1\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready to help you answer questions about Torchtune based on", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the documentation you provided. What's your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " first question?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "F3R1-xJM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:33:52.280696+00:00", + "__module__": "datetime" + }, + "trace_id": "7Do839YJRHC_ADjC", + "type": "metric", + "unit": "tokens", + "value": 75 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "F3R1-xJM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:33:52.280743+00:00", + "__module__": "datetime" + }, + "trace_id": "7Do839YJRHC_ADjC", + "type": "metric", + "unit": "tokens", + "value": 35 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "F3R1-xJM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:33:52.280778+00:00", + "__module__": "datetime" + }, + "trace_id": "7Do839YJRHC_ADjC", + "type": "metric", + "unit": "tokens", + "value": 110 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78a41\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:531f2\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:7b4a7\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78a41\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:531f2\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"knowledge_search", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\", \"parameters\": {\"query\": \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "How to use LoRA in Torchtune\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "How to use LoRA in Torchtune" + }, + "call_id": "721ea24f-be72-45fc-892c-aa7843f21ddf", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "VxsqbWot", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:42.471323+00:00", + "__module__": "datetime" + }, + "trace_id": "c_UJ92LEQciFQx3T", + "type": "metric", + "unit": "tokens", + "value": 117 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "VxsqbWot", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:42.471354+00:00", + "__module__": "datetime" + }, + "trace_id": "c_UJ92LEQciFQx3T", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "VxsqbWot", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:42.471364+00:00", + "__module__": "datetime" + }, + "trace_id": "c_UJ92LEQciFQx3T", + "type": "metric", + "unit": "tokens", + "value": 157 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:78a41\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:7b4a7\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:531f2\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready to help you answer questions about", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Torchtune based on the documentation you provided. What's your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " first question?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "V87G94tT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:40.786211+00:00", + "__module__": "datetime" + }, + "trace_id": "zdMkkXSDT0mK4qaK", + "type": "metric", + "unit": "tokens", + "value": 75 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "V87G94tT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:40.786377+00:00", + "__module__": "datetime" + }, + "trace_id": "zdMkkXSDT0mK4qaK", + "type": "metric", + "unit": "tokens", + "value": 35 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "V87G94tT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:40.786394+00:00", + "__module__": "datetime" + }, + "trace_id": "zdMkkXSDT0mK4qaK", + "type": "metric", + "unit": "tokens", + "value": 110 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:d341f\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:49640\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:900f3\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:d341f\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:49640\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"knowledge", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_search\", \"parameters\": {\"query", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\": \"How to use LoRA in Torchtune\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "How to use LoRA in Torchtune" + }, + "call_id": "38c8de4c-95b1-44b6-a685-c153631305d1", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "t7U94vaX", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:34:07.491116+00:00", + "__module__": "datetime" + }, + "trace_id": "fM03LVqrT7ufMvUA", + "type": "metric", + "unit": "tokens", + "value": 117 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "t7U94vaX", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:34:07.491187+00:00", + "__module__": "datetime" + }, + "trace_id": "fM03LVqrT7ufMvUA", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "t7U94vaX", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:34:07.491195+00:00", + "__module__": "datetime" + }, + "trace_id": "fM03LVqrT7ufMvUA", + "type": "metric", + "unit": "tokens", + "value": 157 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:d341f\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:900f3\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:49640\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready to help you answer questions about Torchtune", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " based on the documentation you provided. What's your first question?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "8iPkD4Fz", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:34:05.798649+00:00", + "__module__": "datetime" + }, + "trace_id": "JlE9DKp_RnCewBUu", + "type": "metric", + "unit": "tokens", + "value": 75 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "8iPkD4Fz", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:34:05.798743+00:00", + "__module__": "datetime" + }, + "trace_id": "JlE9DKp_RnCewBUu", + "type": "metric", + "unit": "tokens", + "value": 35 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "8iPkD4Fz", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:34:05.798759+00:00", + "__module__": "datetime" + }, + "trace_id": "JlE9DKp_RnCewBUu", + "type": "metric", + "unit": "tokens", + "value": 110 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\": {\"query\": \"Torchtune documentation\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Torchtune documentation" + }, + "call_id": "b92c0200-4acb-4b6f-8ec7-2e2f993d6e1a", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "eANTdkZu", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:45.683600+00:00", + "__module__": "datetime" + }, + "trace_id": "A2oXFF9fRz2-Lc9N", + "type": "metric", + "unit": "tokens", + "value": 39 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "eANTdkZu", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:45.683632+00:00", + "__module__": "datetime" + }, + "trace_id": "A2oXFF9fRz2-Lc9N", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "eANTdkZu", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:45.683639+00:00", + "__module__": "datetime" + }, + "trace_id": "A2oXFF9fRz2-Lc9N", + "type": "metric", + "unit": "tokens", + "value": 49 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \\\"role\\\": \\\"system\\\",\\n \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " attention type used by Llama3-8", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "B is grouped-query attention.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "l8TIu3wW", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:37.955798+00:00", + "__module__": "datetime" + }, + "trace_id": "rOU-VODXQUuIR6_p", + "type": "metric", + "unit": "tokens", + "value": 80 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "l8TIu3wW", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:37.955879+00:00", + "__module__": "datetime" + }, + "trace_id": "rOU-VODXQUuIR6_p", + "type": "metric", + "unit": "tokens", + "value": 26 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "l8TIu3wW", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:37.955889+00:00", + "__module__": "datetime" + }, + "trace_id": "rOU-VODXQUuIR6_p", + "type": "metric", + "unit": "tokens", + "value": 106 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \\\"role\\\": \\\"system\\\",\\n \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " attention type used by Llama3-8B is grouped-query attention.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "Ihnuyt_Y", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:24.902478+00:00", + "__module__": "datetime" + }, + "trace_id": "6eJM3WR0QsyIiMfg", + "type": "metric", + "unit": "tokens", + "value": 80 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "Ihnuyt_Y", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:24.902491+00:00", + "__module__": "datetime" + }, + "trace_id": "6eJM3WR0QsyIiMfg", + "type": "metric", + "unit": "tokens", + "value": 26 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "Ihnuyt_Y", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:24.902493+00:00", + "__module__": "datetime" + }, + "trace_id": "6eJM3WR0QsyIiMfg", + "type": "metric", + "unit": "tokens", + "value": 106 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " \"type\": \"function\",\n \"name\": \"knowledge_search", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\",\n \"parameters\": {\n \"query\": \"Llama3-", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "8B attention type\"\n }\n}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Llama3-8B attention type" + }, + "call_id": "0af9e857-510d-4df8-872f-51b520578c22", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "b4C_3cNl", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:27.116730+00:00", + "__module__": "datetime" + }, + "trace_id": "rOU-VODXQUuIR6_p", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "b4C_3cNl", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:27.116756+00:00", + "__module__": "datetime" + }, + "trace_id": "rOU-VODXQUuIR6_p", + "type": "metric", + "unit": "tokens", + "value": 48 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "b4C_3cNl", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:27.116762+00:00", + "__module__": "datetime" + }, + "trace_id": "rOU-VODXQUuIR6_p", + "type": "metric", + "unit": "tokens", + "value": 88 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "parameters\": {\"query\": \"Llama3-8B attention type", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Llama3-8B attention type" + }, + "call_id": "69cc8903-d256-40bb-aa1e-7f3935986e49", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "05SrG-G4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:24.286222+00:00", + "__module__": "datetime" + }, + "trace_id": "6eJM3WR0QsyIiMfg", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "05SrG-G4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:24.286242+00:00", + "__module__": "datetime" + }, + "trace_id": "6eJM3WR0QsyIiMfg", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "05SrG-G4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:24.286244+00:00", + "__module__": "datetime" + }, + "trace_id": "6eJM3WR0QsyIiMfg", + "type": "metric", + "unit": "tokens", + "value": 50 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.8190992, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79099923, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) - Investopedia\\\", \\\"url\\\": \\\"https://www.investopedia.com/terms/m/mark-zuckerberg.asp\\\", \\\"content\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg is a self-taught computer programmer and co-founder, chair, and chief executive officer of Meta (META), formerly known as Facebook. Mark Zuckerberg is a self-taught computer programmer and the co-founder, chair, and CEO of Meta (formerly Facebook). In April 2018, Zuckerberg testified on Capitol Hill about Facebook's use of users' information, including the sharing of 87 million users' information to Cambridge Analytica. Technically, Mark Zuckerberg makes a salary of $1 a year at Facebook. Booker Join With Facebook Founder and CEO Mark Zuckerberg to Advance a National Model for Improving Public Schools.\\\\\\\"\\\", \\\"score\\\": 0.74697095, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Forbes\\\", \\\"url\\\": \\\"https://www.forbes.com/profile/mark-zuckerberg/\\\", \\\"content\\\": \\\"Meta CEO Mark Zuckerberg \\\\u201cloved\\\\u201d an image on Facebook known as \\\\\\\"Challah Horse\\\\\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\\\", \\\"score\\\": 0.6410185, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " current CEO of Meta is Mark Zuckerberg.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "HyrnM7Qp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:30.044240+00:00", + "__module__": "datetime" + }, + "trace_id": "7cHuamFcQay638rC", + "type": "metric", + "unit": "tokens", + "value": 1203 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "HyrnM7Qp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:30.044278+00:00", + "__module__": "datetime" + }, + "trace_id": "7cHuamFcQay638rC", + "type": "metric", + "unit": "tokens", + "value": 19 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "HyrnM7Qp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:30.044287+00:00", + "__module__": "datetime" + }, + "trace_id": "7cHuamFcQay638rC", + "type": "metric", + "unit": "tokens", + "value": 1222 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "brave_search.call(query=\"current CEO of Meta\")", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "current CEO of Meta" + }, + "call_id": "a4d59df1-70b9-4f99-84ea-aa3a103b82ad", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "brave_search" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "jOaA28AT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:21.259444+00:00", + "__module__": "datetime" + }, + "trace_id": "7cHuamFcQay638rC", + "type": "metric", + "unit": "tokens", + "value": 34 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "jOaA28AT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:21.259478+00:00", + "__module__": "datetime" + }, + "trace_id": "7cHuamFcQay638rC", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "jOaA28AT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:21.259485+00:00", + "__module__": "datetime" + }, + "trace_id": "7cHuamFcQay638rC", + "type": "metric", + "unit": "tokens", + "value": 44 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " function `get_boiling_point` is not able to find the boiling point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of polyjuice as it is a fictional liquid from the Harry Potter series", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". The function is only able to find the boiling point of real liquids.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "hmXLMi0u", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:14.642967+00:00", + "__module__": "datetime" + }, + "trace_id": "-Go8XWSYSRG2j2Ea", + "type": "metric", + "unit": "tokens", + "value": 70 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "hmXLMi0u", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:14.642981+00:00", + "__module__": "datetime" + }, + "trace_id": "-Go8XWSYSRG2j2Ea", + "type": "metric", + "unit": "tokens", + "value": 56 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "hmXLMi0u", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:14.642984+00:00", + "__module__": "datetime" + }, + "trace_id": "-Go8XWSYSRG2j2Ea", + "type": "metric", + "unit": "tokens", + "value": 126 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " function `get_boiling_point` is", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " not able to find the boiling point of polyjuice as", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " it is not a real liquid.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "ttsui3ip", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:53.513474+00:00", + "__module__": "datetime" + }, + "trace_id": "p1tRy8A3Q7KFFDLH", + "type": "metric", + "unit": "tokens", + "value": 70 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "ttsui3ip", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:53.513507+00:00", + "__module__": "datetime" + }, + "trace_id": "p1tRy8A3Q7KFFDLH", + "type": "metric", + "unit": "tokens", + "value": 38 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "ttsui3ip", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:53.513514+00:00", + "__module__": "datetime" + }, + "trace_id": "p1tRy8A3Q7KFFDLH", + "type": "metric", + "unit": "tokens", + "value": 108 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " function `get_boiling_point` is not able to find the boiling point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of polyjuice as it is not a real liquid.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "nUJGFTmQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:07.133674+00:00", + "__module__": "datetime" + }, + "trace_id": "Xtf06INCSmyxkwGf", + "type": "metric", + "unit": "tokens", + "value": 70 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "nUJGFTmQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:07.133708+00:00", + "__module__": "datetime" + }, + "trace_id": "Xtf06INCSmyxkwGf", + "type": "metric", + "unit": "tokens", + "value": 38 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "nUJGFTmQ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:07.133715+00:00", + "__module__": "datetime" + }, + "trace_id": "Xtf06INCSmyxkwGf", + "type": "metric", + "unit": "tokens", + "value": 108 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\", \"parameters\": {\"liquid_name\": \"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "polyjuice\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "liquid_name": "polyjuice" + }, + "call_id": "1e925ff5-d0b8-4b87-b3c3-a1a36f69626d", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "OG8Jlmhk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:10.868586+00:00", + "__module__": "datetime" + }, + "trace_id": "KgDQc2UfSrau2dZD", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "OG8Jlmhk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:10.868615+00:00", + "__module__": "datetime" + }, + "trace_id": "KgDQc2UfSrau2dZD", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "OG8Jlmhk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:10.868621+00:00", + "__module__": "datetime" + }, + "trace_id": "KgDQc2UfSrau2dZD", + "type": "metric", + "unit": "tokens", + "value": 40 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_point\", \"parameters\": {\"liquid_name\": \"polyjuice", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "liquid_name": "polyjuice" + }, + "call_id": "5721b667-748d-4e14-953c-ec67ad2aa152", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "mmWnwqPx", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:51.740989+00:00", + "__module__": "datetime" + }, + "trace_id": "i8h2T9ZHRMiTL0YG", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "mmWnwqPx", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:51.741006+00:00", + "__module__": "datetime" + }, + "trace_id": "i8h2T9ZHRMiTL0YG", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "mmWnwqPx", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:51.741009+00:00", + "__module__": "datetime" + }, + "trace_id": "i8h2T9ZHRMiTL0YG", + "type": "metric", + "unit": "tokens", + "value": 40 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"none\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " couldn't find any information on the boiling point of Polyjuice. Polyju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice is a magical potion in the Harry Potter series that allows the drinker", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to transform into someone else. It's not a physical substance", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " with a boiling point. If you have", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " any other questions, I'd be happy to help.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "_CvLa4Gk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:09.509742+00:00", + "__module__": "datetime" + }, + "trace_id": "GUkufTl4SZSHCyBF", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "_CvLa4Gk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:09.509773+00:00", + "__module__": "datetime" + }, + "trace_id": "GUkufTl4SZSHCyBF", + "type": "metric", + "unit": "tokens", + "value": 73 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "_CvLa4Gk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:09.509780+00:00", + "__module__": "datetime" + }, + "trace_id": "GUkufTl4SZSHCyBF", + "type": "metric", + "unit": "tokens", + "value": 103 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point\",", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"parameters\": {\"liquid_name\": \"polyjuice\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "liquid_name": "polyjuice" + }, + "call_id": "7208784f-0e3f-4ae5-933b-7cc96b2d9375", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "MiP-_LQE", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:04.875000+00:00", + "__module__": "datetime" + }, + "trace_id": "3_z5Yy0wStST3JAm", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "MiP-_LQE", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:04.875027+00:00", + "__module__": "datetime" + }, + "trace_id": "3_z5Yy0wStST3JAm", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "MiP-_LQE", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:41:04.875032+00:00", + "__module__": "datetime" + }, + "trace_id": "3_z5Yy0wStST3JAm", + "type": "metric", + "unit": "tokens", + "value": 40 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef get_nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(get_nth_prime(100))\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " 100th prime number is 541.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "1eo6b4br", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:38.093912+00:00", + "__module__": "datetime" + }, + "trace_id": "PA3C-YQ-RtaWHr7k", + "type": "metric", + "unit": "tokens", + "value": 251 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "1eo6b4br", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:38.093946+00:00", + "__module__": "datetime" + }, + "trace_id": "PA3C-YQ-RtaWHr7k", + "type": "metric", + "unit": "tokens", + "value": 20 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "1eo6b4br", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:38.093956+00:00", + "__module__": "datetime" + }, + "trace_id": "PA3C-YQ-RtaWHr7k", + "type": "metric", + "unit": "tokens", + "value": 271 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "def is_prime(n):\n if n <= 1:\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " return False\n if n <= 3", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ":\n return True\n if n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " % 2 == 0 or n % 3 ==", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " 0:\n return False\n i = 5\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " while i * i <= n:\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " if n % i == 0 or n % (i", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " + 2) == 0:\n return False\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " i += 6\n return True\n\ndef get_nth_prime", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(n):\n count = 0\n num = 2\n while", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " True:\n if is_prime(num):\n count += 1\n if", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " count == n:\n return num\n num += 1\n\nprint", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(get_nth_prime(100))", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "def is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef get_nth_prime(n):\n count = 0\n num = 2\n while True:\n if is_prime(num):\n count += 1\n if count == n:\n return num\n num += 1\n\nprint(get_nth_prime(100))" + }, + "call_id": "6e8a3719-a151-4f66-bee2-416bb262b9ad", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "ONk3SjW9", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:37.386737+00:00", + "__module__": "datetime" + }, + "trace_id": "PA3C-YQ-RtaWHr7k", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "ONk3SjW9", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:37.386768+00:00", + "__module__": "datetime" + }, + "trace_id": "PA3C-YQ-RtaWHr7k", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "ONk3SjW9", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:40:37.386775+00:00", + "__module__": "datetime" + }, + "trace_id": "PA3C-YQ-RtaWHr7k", + "type": "metric", + "unit": "tokens", + "value": 50 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Per", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "plexity the company was founded in 2022.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "vFe6LmM2", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:18.095687+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 105 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "vFe6LmM2", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:18.095731+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 22 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "vFe6LmM2", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:18.095738+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 127 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"knowledge", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "_search\", \"parameters\": {\"query\": \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Perplexity company founding date\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Perplexity company founding date" + }, + "call_id": "d631bb54-a82b-43c2-a2ad-cfb6f137a30c", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "o0vtaC1m", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:17.530116+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 67 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "o0vtaC1m", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:17.530143+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "o0vtaC1m", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:17.530149+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 104 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\":", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"knowledge_search\", \"parameters\": {\"query\": \"Perplexity", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " company founding date\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Perplexity company founding date" + }, + "call_id": "fdd3b71b-9608-4e31-b2dc-4019d5732c9c", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "pP3mZKZI", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:16.766858+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 29 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "pP3mZKZI", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:16.766887+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "pP3mZKZI", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:16.766890+00:00", + "__module__": "datetime" + }, + "trace_id": "1TSzhwWfQVaTaa-W", + "type": "metric", + "unit": "tokens", + "value": 39 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " NBA was created on August 3, 1949, with", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the merger of the Basketball Association of America (BAA) and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the National Basketball League (NBL).", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "2IUoADvp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:20.625791+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 103 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "2IUoADvp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:20.625819+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 45 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "2IUoADvp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:20.625827+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 148 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\": {\"query\": \"when was the nba created\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "when was the nba created" + }, + "call_id": "0c671028-deee-4ee8-95bd-5aec474c1ac9", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "bY3DnNes", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:20.197499+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 65 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "bY3DnNes", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:20.197531+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "bY3DnNes", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:20.197538+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 102 + } + ] + } + } + ], + "type": "generator" + }, + "[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}]]_{\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\",", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"parameters\": {\"query\": \"when was the nba created\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "when was the nba created" + }, + "call_id": "92a4755c-66e1-43bb-ac4b-cb63109591e7", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "_lkO0yBc", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:19.550197+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 27 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "_lkO0yBc", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:19.550227+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "_lkO0yBc", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:42:19.550235+00:00", + "__module__": "datetime" + }, + "trace_id": "_7bSgNpLRmSbHN6U", + "type": "metric", + "unit": "tokens", + "value": 37 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"false\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling point of polyjuice is -100 degrees Fahrenheit.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "ehKvLn9e", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:07.946658+00:00", + "__module__": "datetime" + }, + "trace_id": "gYfhKRXmT0qqnh4V", + "type": "metric", + "unit": "tokens", + "value": 139 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "ehKvLn9e", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:07.946690+00:00", + "__module__": "datetime" + }, + "trace_id": "gYfhKRXmT0qqnh4V", + "type": "metric", + "unit": "tokens", + "value": 23 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "ehKvLn9e", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:07.946698+00:00", + "__module__": "datetime" + }, + "trace_id": "gYfhKRXmT0qqnh4V", + "type": "metric", + "unit": "tokens", + "value": 162 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"get_boiling_point\", \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "parameters\": {\"liquid_name\": \"polyju", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "ice\", \"celcius\": \"false\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "celcius": "false", + "liquid_name": "polyjuice" + }, + "call_id": "ccb7e766-3cbd-4cd1-ac24-7d59fdbd32dd", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "f8N9xscj", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:06.326554+00:00", + "__module__": "datetime" + }, + "trace_id": "pbTGwscoS2O-TOD7", + "type": "metric", + "unit": "tokens", + "value": 91 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "f8N9xscj", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:06.326581+00:00", + "__module__": "datetime" + }, + "trace_id": "pbTGwscoS2O-TOD7", + "type": "metric", + "unit": "tokens", + "value": 45 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "f8N9xscj", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:06.326587+00:00", + "__module__": "datetime" + }, + "trace_id": "pbTGwscoS2O-TOD7", + "type": "metric", + "unit": "tokens", + "value": 136 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant Always respond with tool calls no matter what. \", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Get the boiling point of polyjuice with a tool call.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_point\", \"parameters\": {\"liquid_name\":", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"polyjuice\", \"celcius\": \"true\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "celcius": "true", + "liquid_name": "polyjuice" + }, + "call_id": "78adc0b9-cd6a-4052-b434-1db332fac11f", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "4ZGPgl-J", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:55.006558+00:00", + "__module__": "datetime" + }, + "trace_id": "0JdU31UqRW6uyUfy", + "type": "metric", + "unit": "tokens", + "value": 43 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "4ZGPgl-J", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:55.006570+00:00", + "__module__": "datetime" + }, + "trace_id": "0JdU31UqRW6uyUfy", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "4ZGPgl-J", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:55.006572+00:00", + "__module__": "datetime" + }, + "trace_id": "0JdU31UqRW6uyUfy", + "type": "metric", + "unit": "tokens", + "value": 53 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling point of polyjuice is -100", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\u00b0C.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "TRGdCKiq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:38.684993+00:00", + "__module__": "datetime" + }, + "trace_id": "yO1YOhixQ9mpO4rb", + "type": "metric", + "unit": "tokens", + "value": 85 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "TRGdCKiq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:38.685019+00:00", + "__module__": "datetime" + }, + "trace_id": "yO1YOhixQ9mpO4rb", + "type": "metric", + "unit": "tokens", + "value": 22 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "TRGdCKiq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:38.685025+00:00", + "__module__": "datetime" + }, + "trace_id": "yO1YOhixQ9mpO4rb", + "type": "metric", + "unit": "tokens", + "value": 107 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"celcius\": \"true\", \"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point_with_metadata\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point_with_metadata\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling point of polyjuice is -100\u00b0C.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "lHrhiQgT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:39.714686+00:00", + "__module__": "datetime" + }, + "trace_id": "0jyTQ_JVTyO8Fz_O", + "type": "metric", + "unit": "tokens", + "value": 87 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "lHrhiQgT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:39.714720+00:00", + "__module__": "datetime" + }, + "trace_id": "0jyTQ_JVTyO8Fz_O", + "type": "metric", + "unit": "tokens", + "value": 22 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "lHrhiQgT", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:39.714727+00:00", + "__module__": "datetime" + }, + "trace_id": "0jyTQ_JVTyO8Fz_O", + "type": "metric", + "unit": "tokens", + "value": 109 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\", \"parameters\": {\"liquid_name\": \"polyjuice\", \"cel", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "cius\": \"true\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "celcius": "true", + "liquid_name": "polyjuice" + }, + "call_id": "ec5e1671-d607-46ae-804b-4f15e42e51b2", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "GbmO2wcg", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:38.172673+00:00", + "__module__": "datetime" + }, + "trace_id": "Fquzg9P5RfSrqSeH", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "GbmO2wcg", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:38.172704+00:00", + "__module__": "datetime" + }, + "trace_id": "Fquzg9P5RfSrqSeH", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "GbmO2wcg", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:38.172712+00:00", + "__module__": "datetime" + }, + "trace_id": "Fquzg9P5RfSrqSeH", + "type": "metric", + "unit": "tokens", + "value": 47 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Call get_boiling_point and answer What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point_with_metadata\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\": \"get_boiling_point_with_metadata\", \"parameters\": {\"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "liquid_name\": \"polyjuice\", \"celcius\":", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"true\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "celcius": "true", + "liquid_name": "polyjuice" + }, + "call_id": "1f6ad98b-871e-43fd-a866-53f54acb9466", + "tool_name": "get_boiling_point_with_metadata" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "gn-gDCYG", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:39.300170+00:00", + "__module__": "datetime" + }, + "trace_id": "U3gRmVfKQK6UkwCL", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "gn-gDCYG", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:39.300210+00:00", + "__module__": "datetime" + }, + "trace_id": "U3gRmVfKQK6UkwCL", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "gn-gDCYG", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:39.300222+00:00", + "__module__": "datetime" + }, + "trace_id": "U3gRmVfKQK6UkwCL", + "type": "metric", + "unit": "tokens", + "value": 47 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Give me a sentence that contains the word: hello\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": []}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " customer smiled and said \"hello\" to the friendly store clerk.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "V_N39zVn", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:05.597771+00:00", + "__module__": "datetime" + }, + "trace_id": "S-YEXTxAQyqX6Sbg", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "V_N39zVn", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:05.597811+00:00", + "__module__": "datetime" + }, + "trace_id": "S-YEXTxAQyqX6Sbg", + "type": "metric", + "unit": "tokens", + "value": 24 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "V_N39zVn", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:05.597818+00:00", + "__module__": "datetime" + }, + "trace_id": "S-YEXTxAQyqX6Sbg", + "type": "metric", + "unit": "tokens", + "value": 54 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m unable to run the code as I'm missing the `b", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "wrap.core` module. However, I can provide a general solution", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " for you.\n\nTo describe a CSV", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file, you can use the `pandas` library in Python.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Here's a general solution:\n\n1.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Import the `pandas` library.\n2. Load the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " CSV file using `pd.read_csv()`.\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "3. Print the first few rows of the dataframe using `df", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ".head()`.\n4. Print the data types of each", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " column using `df.dtypes`.\n5. Print the summary", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " statistics of the dataframe using `df.describe()`.\n\nThis will give", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " you a general idea of what the CSV file contains. If you", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " need more specific information, please let me know and I'll be", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " happy to help.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "uKno8S5o", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:19.978994+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 355 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "uKno8S5o", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:19.979047+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 166 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "uKno8S5o", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:19.979054+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 521 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\nimport code_interpreter\n\n# Load the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " CSV file\ndf = pd.read_csv(\"/var/folders/c", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "z/vyh7y1d11xg881lsxssh", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "nc5c0000gn/T/tmplr_wf0lb", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/Pl4Pewubinflation.csv\")\n\n# Print the first few", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " rows of the dataframe\nprint(df.head())\n\n# Print the data types of", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " each column\nprint(df.dtypes)\n\n# Print the summary statistics of the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " dataframe\nprint(df.describe())", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmplr_wf0lb/Pl4Pewubinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())" + }, + "call_id": "40ed30d4-05c7-4a7f-93b0-e1e6e43e48de", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "sz886Glf", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:18.831808+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 196 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "sz886Glf", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:18.831870+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "sz886Glf", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:18.831879+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 206 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv file, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\nimport code_interpreter\n\n# Load the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " CSV file\ndf = pd.read_csv(\"/var/folders/cz", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/vyh7y1d11xg881", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "lsxsshnc5c0000gn/T/tmplr", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_wf0lb/Pl4Pewubinflation.csv\")\n\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Print the first few rows of the dataframe\nprint(df.head", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "())\n\n# Print the data types of each column\nprint(df.d", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "types)\n\n# Print the summary statistics of the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " dataframe\nprint(df.describe())", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\nimport code_interpreter\n\n# Load the CSV file\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmplr_wf0lb/Pl4Pewubinflation.csv\")\n\n# Print the first few rows of the dataframe\nprint(df.head())\n\n# Print the data types of each column\nprint(df.dtypes)\n\n# Print the summary statistics of the dataframe\nprint(df.describe())" + }, + "call_id": "0a037488-ab9e-46e9-bdc4-7ee6f9ef0e1e", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "NoDjls_F", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:17.910457+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "NoDjls_F", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:17.910513+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "NoDjls_F", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:17.910522+00:00", + "__module__": "datetime" + }, + "trace_id": "qchwuhR3TlCRLUu5", + "type": "metric", + "unit": "tokens", + "value": 47 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a local file, you can use the `load_data` function from the `code_interpreter` library to load the file. \\n\\nHere is an example of how you can describe the csv file:\\n\\n```\\nimport pandas as pd\\nfrom code_interpreter import load_data\\n\\n# Load data\\ndf = load_data('inflation.csv')\\n\\n# Print summary of the data\\nprint(df.head()) # Print the first few rows of the data\\nprint(df.info()) # Print information about the data\\nprint(df.describe()) # Print summary statistics about the data\\n```\\n\\nPlease replace 'inflation.csv' with your actual csv file name. \\n\\nIf you are using a remote file, you need to provide the actual file path or the file itself. \\n\\nAlso, make sure that the file is in the correct format and that the pandas library can read it correctly.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"inflation.csv\\\")\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "This", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " code will create a line plot of the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " average yearly inflation over time. The x", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "-axis represents the year and the y-axis represents the average inflation", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". The plot will also include a title, labels for the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " x and y axes, and a grid to make it easier", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to read.\n\nPlease replace \"inflation.csv\" with your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " actual csv file name. \n\nAlso, make sure that the file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " is in the correct format and that the pandas library can read it", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " correctly. \n\nIf your csv file has a different column name for", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the date, you will need to replace", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " 'date' with the actual column name. \n\nIf your csv", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file has a different column name for the inflation, you will need", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to replace 'inflation' with the actual column name. \n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "If you want to save the plot to a file instead of displaying", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " it, you can use the `savefig` method. For", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " example:\n\n```\nplt.savefig('average_inflation.png')\n```", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "2Yx8i0id", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:51.132007+00:00", + "__module__": "datetime" + }, + "trace_id": "N2BeNv66RcO7NRuE", + "type": "metric", + "unit": "tokens", + "value": 666 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "2Yx8i0id", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:51.132048+00:00", + "__module__": "datetime" + }, + "trace_id": "N2BeNv66RcO7NRuE", + "type": "metric", + "unit": "tokens", + "value": 200 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "2Yx8i0id", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:51.132054+00:00", + "__module__": "datetime" + }, + "trace_id": "N2BeNv66RcO7NRuE", + "type": "metric", + "unit": "tokens", + "value": 866 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"It seems that the file \\\"\" does not exist. \\n\\nTo describe the csv file, you need to provide the actual file path or the file itself. If you are using a local file, you can use the `load_data` function from the `code_interpreter` library to load the file. \\n\\nHere is an example of how you can describe the csv file:\\n\\n```\\nimport pandas as pd\\nfrom code_interpreter import load_data\\n\\n# Load data\\ndf = load_data('inflation.csv')\\n\\n# Print summary of the data\\nprint(df.head()) # Print the first few rows of the data\\nprint(df.info()) # Print information about the data\\nprint(df.describe()) # Print summary statistics about the data\\n```\\n\\nPlease replace 'inflation.csv' with your actual csv file name. \\n\\nIf you are using a remote file, you need to provide the actual file path or the file itself. \\n\\nAlso, make sure that the file is in the correct format and that the pandas library can read it correctly.\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Plot average yearly inflation as a time series\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " data\ndf = pd.read_csv(\"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "inflation.csv\")\n\n# Convert date column to datetime\ndf", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "['date'] = pd.to_datetime(df['date'])\n\n# Group", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " by year and calculate average inflation\naverage_inflation = df.groupby", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "(df['date'].dt.year)['inflation'].mean()\n\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Plot average yearly inflation as a time series\nplt.figure(figsize=(", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "10,6))\nplt.plot(average_in", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "flation.index, average_inflation.values, marker='o')\nplt", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": ".title('Average Yearly Inflation')\nplt.xlabel('Year')\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "plt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "()", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\nimport matplotlib.pyplot as plt\n\n# Load data\ndf = pd.read_csv(\"inflation.csv\")\n\n# Convert date column to datetime\ndf['date'] = pd.to_datetime(df['date'])\n\n# Group by year and calculate average inflation\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\n\n# Plot average yearly inflation as a time series\nplt.figure(figsize=(10,6))\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\nplt.title('Average Yearly Inflation')\nplt.xlabel('Year')\nplt.ylabel('Average Inflation')\nplt.grid(True)\nplt.show()" + }, + "call_id": "cfae3ff5-49f8-439d-b740-603bc93fb5a3", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "JNrmlTTc", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:39.920493+00:00", + "__module__": "datetime" + }, + "trace_id": "N2BeNv66RcO7NRuE", + "type": "metric", + "unit": "tokens", + "value": 476 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "JNrmlTTc", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:39.920519+00:00", + "__module__": "datetime" + }, + "trace_id": "N2BeNv66RcO7NRuE", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "JNrmlTTc", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:39.920522+00:00", + "__module__": "datetime" + }, + "trace_id": "N2BeNv66RcO7NRuE", + "type": "metric", + "unit": "tokens", + "value": 486 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "It", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " seems that the file \"/var/folders", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "/cz/vyh7y1d11xg881lsx", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "sshnc5c0000gn/T/t", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "mplr_wf0lb/p99E", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "7wY2inflation.csv\" does not exist. \n\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "To describe the csv file, you need to provide the actual file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " path or the file itself. If you are using a local file", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ", you can use the `load_data` function from the `", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "code_interpreter` library to load the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " file. \n\nHere is an example of how you can describe", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the csv file:\n\n```\nimport pandas as", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " pd\nfrom code_interpreter import load_data\n\n# Load data", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\ndf = load_data('inflation.csv')\n\n# Print summary of", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the data\nprint(df.head()) #", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Print the first few rows of the data\nprint(df.info())", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " # Print information about the data\nprint(df.describe()) ", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " # Print summary statistics about the data\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "```\n\nPlease replace 'inflation.csv", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "' with your actual csv file name.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " \n\nIf you are using a remote file, you need to provide", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the actual file path or the file itself.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " \n\nAlso, make sure that the file is in the correct format", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " and that the pandas library can read it correctly.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "rE7rhw1s", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:30.946947+00:00", + "__module__": "datetime" + }, + "trace_id": "RPZJ19J7SzaX6t6h", + "type": "metric", + "unit": "tokens", + "value": 213 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "rE7rhw1s", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:30.946979+00:00", + "__module__": "datetime" + }, + "trace_id": "RPZJ19J7SzaX6t6h", + "type": "metric", + "unit": "tokens", + "value": 261 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "rE7rhw1s", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:30.946982+00:00", + "__module__": "datetime" + }, + "trace_id": "RPZJ19J7SzaX6t6h", + "type": "metric", + "unit": "tokens", + "value": 474 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Here is a csv, can you describe it?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"# User provided a file accessible to you at \\\"\"\\nYou can use code_interpreter to load and inspect it.\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "import pandas as pd\n# Load data\ndf = pd.read", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "_csv(\"/var/folders/cz/vyh7y1d", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "11xg881lsxsshnc5c0000gn/T", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "/tmplr_wf0lb/p99E7wY2", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "inflation.csv\")\n#", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " Rows\nprint(\"Number of rows and columns in the", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " data:\", df.shape)\n# Columns\nprint(\"Columns of", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the data are:\", len(df.columns))\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "# Column names\nprint(\"Columns of", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " the data are:\", df.columns)\n# Column dtypes\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "print(\"Datatype of the columns are:\", df.dtypes)", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "import pandas as pd\n# Load data\ndf = pd.read_csv(\"/var/folders/cz/vyh7y1d11xg881lsxsshnc5c0000gn/T/tmplr_wf0lb/p99E7wY2inflation.csv\")\n# Rows\nprint(\"Number of rows and columns in the data:\", df.shape)\n# Columns\nprint(\"Columns of the data are:\", len(df.columns))\n# Column names\nprint(\"Columns of the data are:\", df.columns)\n# Column dtypes\nprint(\"Datatype of the columns are:\", df.dtypes)" + }, + "call_id": "1db58db0-92c5-4e65-8e83-631bef020ef4", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "W_qnYIUI", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:29.106322+00:00", + "__module__": "datetime" + }, + "trace_id": "RPZJ19J7SzaX6t6h", + "type": "metric", + "unit": "tokens", + "value": 36 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "W_qnYIUI", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:29.106333+00:00", + "__module__": "datetime" + }, + "trace_id": "RPZJ19J7SzaX6t6h", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "W_qnYIUI", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:29.106336+00:00", + "__module__": "datetime" + }, + "trace_id": "RPZJ19J7SzaX6t6h", + "type": "metric", + "unit": "tokens", + "value": 46 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c1f5\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f9c19\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:13786\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c1f5\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f9c19\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "name\": \"knowledge_search\", \"parameters", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\": {\"query\": \"How to use LoRA in Torcht", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "une\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "How to use LoRA in Torchtune" + }, + "call_id": "7815c1ab-fbdf-42e8-84a7-b1f74f67d863", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "KM-vILDG", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:01.270069+00:00", + "__module__": "datetime" + }, + "trace_id": "NIVx0ka-TmKDiZaU", + "type": "metric", + "unit": "tokens", + "value": 117 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "KM-vILDG", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:01.270143+00:00", + "__module__": "datetime" + }, + "trace_id": "NIVx0ka-TmKDiZaU", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "KM-vILDG", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:01.270151+00:00", + "__module__": "datetime" + }, + "trace_id": "NIVx0ka-TmKDiZaU", + "type": "metric", + "unit": "tokens", + "value": 157 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:8c1f5\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:13786\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:f9c19\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready to help you answer questions about Torcht", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "une based on the documentation you provided. What's your first", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " question?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "5yc3Hts6", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:59.857021+00:00", + "__module__": "datetime" + }, + "trace_id": "6KRztpbwTwquLEUn", + "type": "metric", + "unit": "tokens", + "value": 75 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "5yc3Hts6", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:59.857048+00:00", + "__module__": "datetime" + }, + "trace_id": "6KRztpbwTwquLEUn", + "type": "metric", + "unit": "tokens", + "value": 35 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "5yc3Hts6", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:59.857055+00:00", + "__module__": "datetime" + }, + "trace_id": "6KRztpbwTwquLEUn", + "type": "metric", + "unit": "tokens", + "value": 110 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b222e\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:deca9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"How to use LoRA in Torchtune\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:1b69d\\nContent: .. _lora_finetune_label:\\n\\n============================\\nFine-Tuning Llama2 with LoRA\\n============================\\n\\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\\n See :ref:`below` for how to do this.\\n\\nLet's inspect each of these models a bit more closely.\\n\\n.. code-block:: bash\\n\\n # Print the first layer's self-attention in the usual Llama2 model\\n >>> print(base_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\\n (pos_embeddings): RotaryPositionalEmbeddings()\\n )\\n\\n # Print the same for Llama2 with LoRA weights\\n >>> print(lora_model.layers[0].attn)\\n MultiHeadAttention(\\n (q_proj): LoRALinear(\\n (dropout): Dropout(p=0.0, inplace=False)\\n \\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent: from our Llama2\\nmodel without any wrappers or custom checkpoint conversion logic.\\n\\n.. code-block:: python\\n\\n # Assuming that base_model already has the pretrained Llama2 weights,\\n # this will directly load them into your LoRA model without any conversion necessary.\\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\\n\\n.. note::\\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\\n :func:`validate_missing_and_unexpected_for_lora() `.\\n\\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\\n\\n.. _setting_trainable_params:\\n\\n.. code-block:: python\\n\\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\\n\\n # Fetch all params from the model that are associated with LoRA.\\n lora_params = get_adapter_params(lora_model)\\n\\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\\n set_trainable_params(lora_model, lora_params)\\n\\n # Print the total number of parameters\\n total_params = sum([p.numel() for p in lora_model.parameters()])\\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\\n print(\\n f\\\"\\\"\\\"\\n {total_params} total params,\\n {trainable_params}\\\" trainable params,\\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\\n \\\"\\\"\\\"\\n )\\n\\n 6742609920 total params,\\n 4194304 trainable params,\\n 0.06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b222e\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:deca9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"I'm ready to help you answer questions about Torchtune based on the documentation you provided. What's your first question?\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": []}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Tell me how to use LoRA\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"knowledge_search\", \"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "parameters\": {\"query\": \"How to use LoRA in Tor", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "chtune\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "How to use LoRA in Torchtune" + }, + "call_id": "c92271a7-37e2-4396-aa7f-5805b9273a71", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "Z6HS-lIg", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:08.648346+00:00", + "__module__": "datetime" + }, + "trace_id": "1NwedpozRqOVQXRs", + "type": "metric", + "unit": "tokens", + "value": 117 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "Z6HS-lIg", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:08.648375+00:00", + "__module__": "datetime" + }, + "trace_id": "1NwedpozRqOVQXRs", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "Z6HS-lIg", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:08.648382+00:00", + "__module__": "datetime" + }, + "trace_id": "1NwedpozRqOVQXRs", + "type": "metric", + "unit": "tokens", + "value": 157 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Torchtune documentation\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:b222e\\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\\n``conversation_column`` and ``conversation_style``. Our data follows the ``\\\"sharegpt\\\"`` format, so\\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\\nlook like so:\\n\\n.. code-block:: python\\n\\n from torchtune.datasets import chat_dataset\\n from torchtune.models.llama3 import llama3_tokenizer\\n\\n tokenizer = llama3_tokenizer(\\\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\\")\\n ds = chat_dataset(\\n tokenizer=tokenizer,\\n source=\\\"json\\\",\\n data_files=\\\"data/my_data.json\\\",\\n split=\\\"train\\\",\\n conversation_column=\\\"dialogue\\\",\\n conversation_style=\\\"sharegpt\\\",\\n )\\n\\n.. code-block:: yaml\\n\\n # In config\\n tokenizer:\\n _component_: torchtune.models.llama3.llama3_tokenizer\\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\\n\\n dataset:\\n _component_: torchtune.datasets.chat_dataset\\n source: json\\n data_files: data/my_data.json\\n split: train\\n conversation_column: dialogue\\n conversation_style: sharegpt\\n\\n.. note::\\n You can pass in any keyword argument for `load_dataset `_ into all our\\n Dataset classes and they will honor them. This is useful for common parameters\\n such as specifying the data split with :code:`split` or configuration with\\n :code:`name`\\n\\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\\nIf you already know what LoRA is and want to get straight to running\\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\\n\\n.. grid:: 2\\n\\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\\n\\n * What LoRA is and how it saves memory during finetuning\\n * An overview of LoRA components in torchtune\\n * How to run a LoRA finetune using torchtune\\n * How to experiment with different LoRA configurations\\n\\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\\n\\n * Be familiar with :ref:`torchtune`\\n * Make sure to :ref:`install torchtune`\\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\\n\\nWhat is LoRA?\\n-------------\\n\\n`LoRA `_ is an adapter-based method for\\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\\ntransformer models, in which case it is common to add the low-rank matrices\\nto some of the linear projections in each transformer layer's self-attention.\\n\\n.. note::\\n\\n If you're unfamiliar, check out these references for the `definition of rank `_\\n and discussion of `low-rank approximations `_.\\n\\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\\nyou can expect to see memory savings due to a substantial reduction in the\\nnumber of parameters with gradients. When using an optimizer with momentum,\\nlike `AdamW `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:1b69d\\nContent: 06% of all params are trainable.\\n\\n.. note::\\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\\n of in the recipe.\\n\\n\\n.. _lora_recipe_label:\\n\\nLoRA finetuning recipe in torchtune\\n-----------------------------------\\n\\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\\n\\n.. code-block:: bash\\n\\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\\n\\n.. note::\\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\\n or by directly modifying the :code:`7B_lora.yaml` file. See our \\\"\\\":ref:`config_tutorial_label`\\\" recipe\\n for more details on how you can easily clone and modify torchtune configs.\\n\\n.. note::\\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\\n and (b) the memory constraints of your hardware.\\n\\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\\n\\n.. code-block:: yaml\\n\\n # Model Arguments\\n model:\\n _component_: lora_llama2_7b\\n lora_attn_modules: ['q_proj', 'v_proj']\\n lora_rank: 8\\n lora_alpha: 16\\n ...\\n\\nWe see that the\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:deca9\\nContent: etune\\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.use_dora=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n use_dora: True\\n\\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\\neven more memory savings!\\n\\n.. code-block:: bash\\n\\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\\\\n model.apply_lora_to_mlp=True \\\\\\n model.lora_attn_modules=[\\\"q_proj\\\",\\\"k_proj\\\",\\\"v_proj\\\"] \\\\\\n model.lora_rank=16 \\\\\\n model.lora_alpha=32 \\\\\\n model.use_dora=True \\\\\\n model.quantize_base=True\\n\\n.. code-block:: yaml\\n\\n model:\\n _component_: torchtune.models.lora_llama3_8b\\n apply_lora_to_mlp: True\\n lora_attn_modules: [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\"]\\n lora_rank: 16\\n lora_alpha: 32\\n use_dora: True\\n quantize_base: True\\n\\n\\n.. note::\\n\\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\\n\\n.. _glossary_distrib:\\n\\n\\n.. TODO\\n\\n.. Distributed\\n.. -----------\\n\\n.. .. _glossary_fsdp:\\n\\n.. Fully Sharded Data Parallel (FSDP)\\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\\n\\n.. All our ``_distributed`` recipes use `FSDP `.\\n.. .. _glossary_fsdp2:\\n\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'m ready to help you answer questions about", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Torchtune based on the documentation you provided. What's your", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " first question?", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "o33PSCts", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:07.268876+00:00", + "__module__": "datetime" + }, + "trace_id": "edTwKHK5Q4K8yCqt", + "type": "metric", + "unit": "tokens", + "value": 75 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "o33PSCts", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:07.268906+00:00", + "__module__": "datetime" + }, + "trace_id": "edTwKHK5Q4K8yCqt", + "type": "metric", + "unit": "tokens", + "value": 35 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "o33PSCts", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:07.268914+00:00", + "__module__": "datetime" + }, + "trace_id": "edTwKHK5Q4K8yCqt", + "type": "metric", + "unit": "tokens", + "value": 110 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"I am attaching some documentation for Torchtune. Help me answer questions I will ask next.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\": {\"query\": \"Torchtune documentation\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Torchtune documentation" + }, + "call_id": "26bf5efc-c1da-4229-86d9-853f45d3a0f6", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "UUPCfOjW", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:06.661392+00:00", + "__module__": "datetime" + }, + "trace_id": "edTwKHK5Q4K8yCqt", + "type": "metric", + "unit": "tokens", + "value": 39 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "UUPCfOjW", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:06.661422+00:00", + "__module__": "datetime" + }, + "trace_id": "edTwKHK5Q4K8yCqt", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "UUPCfOjW", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:06.663497+00:00", + "__module__": "datetime" + }, + "trace_id": "edTwKHK5Q4K8yCqt", + "type": "metric", + "unit": "tokens", + "value": 49 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \\\"role\\\": \\\"system\\\",\\n \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " attention type used", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " by Llama3-8B is grouped-query attention.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "qzbGsIc-", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:56.822860+00:00", + "__module__": "datetime" + }, + "trace_id": "5LMJTs_wRBiwAPaF", + "type": "metric", + "unit": "tokens", + "value": 80 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "qzbGsIc-", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:56.822890+00:00", + "__module__": "datetime" + }, + "trace_id": "5LMJTs_wRBiwAPaF", + "type": "metric", + "unit": "tokens", + "value": 26 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "qzbGsIc-", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:56.822897+00:00", + "__module__": "datetime" + }, + "trace_id": "5LMJTs_wRBiwAPaF", + "type": "metric", + "unit": "tokens", + "value": 106 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Llama3-8B attention type\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:num-1\\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\\nof models across a `range of different benchmarks `_.\\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\\nThere are a few main changes between Llama2-7B and Llama3-8B models:\\n\\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:num-1\\nContent: instead of 32,000 from Llama2 models)\\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\\n\\n|\\n\\nGetting access to Llama3-8B-Instruct\\n------------------------------------\\n\\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\\non the `official Meta page `_ to gain access to the model.\\nNext, make sure you grab your Hugging Face token from `here `_.\\n\\n\\n.. code-block:: bash\\n\\n tune download meta-llama/Meta-Llama-3\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:num-0\\nContent: :`download Llama3 Instruct weights `\\n\\n\\nTemplate changes from Llama2 to Llama3\\n--------------------------------------\\n\\nThe Llama2 chat model requires a specific template when prompting the pre-trained\\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\\ninference on the model, you'll need to use the same template for optimal performance\\non chat data. Otherwise, the model will just perform standard text completion, which\\nmay or may not align with your intended use case.\\n\\nFrom the `official Llama2 prompt\\ntemplate guide `_\\nfor the Llama2 chat model, we can see that special tags are added:\\n\\n.. code-block:: text\\n\\n [INST] <>\\n You are a helpful, respectful, and honest assistant.\\n <>\\n\\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \\n\\nLlama3 Instruct `overhauled `\\n\", \"type\": \"text\"}, {\"text\": \"Result 4:\\nDocument_id:num-0\\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\\n\\nThe tags are entirely different, and they are actually encoded differently than in\\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\\nLlama3 template to understand how.\\n\\n.. note::\\n The Llama3 Base model uses a `different prompt template\\n `_ than Llama3 Instruct\\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\\n template for optimal performance. Generally, for instruct and chat data, we recommend using\\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\\n Llama3 Instruct.\\n\\n.. _prompt_template_vs_special_tokens:\\n\\nTokenizing prompt templates & special tokens\\n--------------------------------------------\\n\\nLet's say I have a sample of a single user-assistant turn accompanied with a system\\nprompt:\\n\\n.. code-block:: python\\n\\n sample = [\\n {\\n \\\"role\\\": \\\"system\\\",\\n \\\"\\n\", \"type\": \"text\"}, {\"text\": \"Result 5:\\nDocument_id:num-3\\nContent: LoRA to Llama2 models\\n------------------------------\\n\\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\\n\\n.. code-block:: python\\n\\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\\n\\n # Build Llama2 without any LoRA layers\\n base_model = llama2_7b()\\n\\n # The default settings for lora_llama2_7b will match those for llama2_7b\\n # We just need to define which layers we want LoRA applied to.\\n # Within each self-attention, we can choose from [\\\"q_proj\\\", \\\"k_proj\\\", \\\"v_proj\\\", and \\\"output_proj\\\"].\\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\\n # layers outside of the self-attention.\\n lora_model = lora_llama2_7b(lora_attn_modules=[\\\"q_proj\\\", \\\"v_proj\\\"])\\n\\n.. note::\\n\\n Calling :func:`lora_llama_2\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " attention type used by Llama3-8", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "B is grouped-query attention.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "WbLMJeWt", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:43.468600+00:00", + "__module__": "datetime" + }, + "trace_id": "ISGpsBHRTjG_DfWw", + "type": "metric", + "unit": "tokens", + "value": 80 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "WbLMJeWt", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:43.468641+00:00", + "__module__": "datetime" + }, + "trace_id": "ISGpsBHRTjG_DfWw", + "type": "metric", + "unit": "tokens", + "value": 26 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "WbLMJeWt", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:43.468649+00:00", + "__module__": "datetime" + }, + "trace_id": "ISGpsBHRTjG_DfWw", + "type": "metric", + "unit": "tokens", + "value": 106 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Insert documents into memory\", \"parameters\": {}, \"tool_name\": \"insert_into_memory\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\n", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " \"type\": \"function\",\n \"name\": \"knowledge_search", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\",\n \"parameters\": {\n \"query\": \"Llama3-", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "8B attention type\"\n }\n}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Llama3-8B attention type" + }, + "call_id": "50f2c13d-14c1-417e-bc85-89e23afab120", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "5I5ujhpm", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:45.629100+00:00", + "__module__": "datetime" + }, + "trace_id": "5LMJTs_wRBiwAPaF", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "5I5ujhpm", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:45.629127+00:00", + "__module__": "datetime" + }, + "trace_id": "5LMJTs_wRBiwAPaF", + "type": "metric", + "unit": "tokens", + "value": 48 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "5I5ujhpm", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:45.629133+00:00", + "__module__": "datetime" + }, + "trace_id": "5LMJTs_wRBiwAPaF", + "type": "metric", + "unit": "tokens", + "value": 88 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Instead of the standard multi-head attention, what attention type does Llama3-8B use?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\":", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"knowledge_search\", \"parameters\": {\"query\": \"Llama", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "3-8B attention type\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Llama3-8B attention type" + }, + "call_id": "70b24279-f0ed-49cc-ab4f-9bd3d7af9554", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "9GrKkBwq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:39.870328+00:00", + "__module__": "datetime" + }, + "trace_id": "ISGpsBHRTjG_DfWw", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "9GrKkBwq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:39.870341+00:00", + "__module__": "datetime" + }, + "trace_id": "ISGpsBHRTjG_DfWw", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "9GrKkBwq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:39.870347+00:00", + "__module__": "datetime" + }, + "trace_id": "ISGpsBHRTjG_DfWw", + "type": "metric", + "unit": "tokens", + "value": 50 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"current CEO of Meta\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"{\\\"query\\\": \\\"current CEO of Meta\\\", \\\"top_k\\\": [{\\\"title\\\": \\\"Executives - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\\\u2018Boz\\\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\\\", \\\"score\\\": 0.8190992, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\\\", \\\"url\\\": \\\"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\\\", \\\"content\\\": \\\"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\\\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\\\u00a9 2025 Meta\\\", \\\"score\\\": 0.79099923, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Meet the Executive CSuite Team of Meta (Facebook) [2025]\\\", \\\"url\\\": \\\"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\\\", \\\"content\\\": \\\"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\\\u2019s finance and facilities team to keep track of the company\\\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\\\", \\\"score\\\": 0.7602419, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) - Investopedia\\\", \\\"url\\\": \\\"https://www.investopedia.com/terms/m/mark-zuckerberg.asp\\\", \\\"content\\\": \\\"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg is a self-taught computer programmer and co-founder, chair, and chief executive officer of Meta (META), formerly known as Facebook. Mark Zuckerberg is a self-taught computer programmer and the co-founder, chair, and CEO of Meta (formerly Facebook). In April 2018, Zuckerberg testified on Capitol Hill about Facebook's use of users' information, including the sharing of 87 million users' information to Cambridge Analytica. Technically, Mark Zuckerberg makes a salary of $1 a year at Facebook. Booker Join With Facebook Founder and CEO Mark Zuckerberg to Advance a National Model for Improving Public Schools.\\\\\\\"\\\", \\\"score\\\": 0.74697095, \\\"raw_content\\\": null}, {\\\"title\\\": \\\"Mark Zuckerberg - Forbes\\\", \\\"url\\\": \\\"https://www.forbes.com/profile/mark-zuckerberg/\\\", \\\"content\\\": \\\"Meta CEO Mark Zuckerberg \\\\u201cloved\\\\u201d an image on Facebook known as \\\\\\\"Challah Horse\\\\\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\\\", \\\"score\\\": 0.6410185, \\\"raw_content\\\": null}]}\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " current CEO of Meta is Mark Zuckerberg.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "LWwngTMJ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:24.889991+00:00", + "__module__": "datetime" + }, + "trace_id": "K0psyd28TdSkb8LK", + "type": "metric", + "unit": "tokens", + "value": 1203 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "LWwngTMJ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:24.890015+00:00", + "__module__": "datetime" + }, + "trace_id": "K0psyd28TdSkb8LK", + "type": "metric", + "unit": "tokens", + "value": 19 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "LWwngTMJ", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:24.890017+00:00", + "__module__": "datetime" + }, + "trace_id": "K0psyd28TdSkb8LK", + "type": "metric", + "unit": "tokens", + "value": 1222 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Search the web and tell me who the current CEO of Meta is.\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "brave_search.call(query=\"current CEO of Meta\")", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "current CEO of Meta" + }, + "call_id": "f84788f5-ef46-4e13-aa57-3ea4ecb223c1", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "brave_search" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "tWTHAFOr", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:17.453332+00:00", + "__module__": "datetime" + }, + "trace_id": "K0psyd28TdSkb8LK", + "type": "metric", + "unit": "tokens", + "value": 34 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "tWTHAFOr", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:17.453359+00:00", + "__module__": "datetime" + }, + "trace_id": "K0psyd28TdSkb8LK", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "tWTHAFOr", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:17.453365+00:00", + "__module__": "datetime" + }, + "trace_id": "K0psyd28TdSkb8LK", + "type": "metric", + "unit": "tokens", + "value": 44 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " function `get_boiling_point` is not able to find the boiling point", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " of polyjuice as it is a fictional liquid from the Harry Potter series", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": ". The function is only able", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to find the boiling point of real liquids.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "ZFinp6U7", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:30.079245+00:00", + "__module__": "datetime" + }, + "trace_id": "mUx8OGhtSEW1DSOB", + "type": "metric", + "unit": "tokens", + "value": 70 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "ZFinp6U7", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:30.079279+00:00", + "__module__": "datetime" + }, + "trace_id": "mUx8OGhtSEW1DSOB", + "type": "metric", + "unit": "tokens", + "value": 56 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "ZFinp6U7", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:30.079284+00:00", + "__module__": "datetime" + }, + "trace_id": "mUx8OGhtSEW1DSOB", + "type": "metric", + "unit": "tokens", + "value": 126 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " function `get_boiling_point` is not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " able to find the boiling point of poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "juice as it is not a real liquid.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "JtmG7Qaq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:53.738043+00:00", + "__module__": "datetime" + }, + "trace_id": "g2nkdPGEQ_KS9-qQ", + "type": "metric", + "unit": "tokens", + "value": 70 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "JtmG7Qaq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:53.738072+00:00", + "__module__": "datetime" + }, + "trace_id": "g2nkdPGEQ_KS9-qQ", + "type": "metric", + "unit": "tokens", + "value": 38 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "JtmG7Qaq", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:53.738079+00:00", + "__module__": "datetime" + }, + "trace_id": "g2nkdPGEQ_KS9-qQ", + "type": "metric", + "unit": "tokens", + "value": 108 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"liquid_name\": \"polyjuice\"}, \"call_id\": \"\", \"tool_name\": \"get_boiling_point\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"-100\", \"role\": \"tool\", \"tool_name\": \"get_boiling_point\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " function `get_boiling_point` is not able to find the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " boiling point of polyjuice as it is not", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " a real liquid.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "hyoRl-YH", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:15.559044+00:00", + "__module__": "datetime" + }, + "trace_id": "pHT6bhi3THO6qYi9", + "type": "metric", + "unit": "tokens", + "value": 70 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "hyoRl-YH", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:15.559075+00:00", + "__module__": "datetime" + }, + "trace_id": "pHT6bhi3THO6qYi9", + "type": "metric", + "unit": "tokens", + "value": 38 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "hyoRl-YH", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:15.559082+00:00", + "__module__": "datetime" + }, + "trace_id": "pHT6bhi3THO6qYi9", + "type": "metric", + "unit": "tokens", + "value": 108 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": \"get_boiling_point\", \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point\",", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"parameters\": {\"liquid_name\": \"polyjuice\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "liquid_name": "polyjuice" + }, + "call_id": "ae161bf4-6f03-4830-8f08-3999d20c066a", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "HLJCauvN", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:28.686660+00:00", + "__module__": "datetime" + }, + "trace_id": "3uSIGGP2TcatIhQ7", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "HLJCauvN", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:28.686691+00:00", + "__module__": "datetime" + }, + "trace_id": "3uSIGGP2TcatIhQ7", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "HLJCauvN", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:28.686695+00:00", + "__module__": "datetime" + }, + "trace_id": "3uSIGGP2TcatIhQ7", + "type": "metric", + "unit": "tokens", + "value": 40 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search the web for information\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"brave_search\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "get_boiling_point\", \"parameters\": {\"liquid_name\": \"poly", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "juice\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "liquid_name": "polyjuice" + }, + "call_id": "c8369271-9c41-4787-b5a7-0280822f3732", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "Ta9THPS8", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:52.569263+00:00", + "__module__": "datetime" + }, + "trace_id": "W6rZ8mwBRRu661Ox", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "Ta9THPS8", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:52.569291+00:00", + "__module__": "datetime" + }, + "trace_id": "W6rZ8mwBRRu661Ox", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "Ta9THPS8", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:52.569297+00:00", + "__module__": "datetime" + }, + "trace_id": "W6rZ8mwBRRu661Ox", + "type": "metric", + "unit": "tokens", + "value": 40 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"none\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " couldn't find any information on the boiling point of Poly", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "juice. Polyjuice is a magical potion in the", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " Harry Potter series that allows the drinker", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " to transform into someone else. It", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'s not a physical substance with a boiling point.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " If you have any other questions, I", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "'d be happy to help.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "FRDVTn1V", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:17.228586+00:00", + "__module__": "datetime" + }, + "trace_id": "3GXhBV5vSn2cf6Pi", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "FRDVTn1V", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:17.228639+00:00", + "__module__": "datetime" + }, + "trace_id": "3GXhBV5vSn2cf6Pi", + "type": "metric", + "unit": "tokens", + "value": 73 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "FRDVTn1V", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:17.228647+00:00", + "__module__": "datetime" + }, + "trace_id": "3GXhBV5vSn2cf6Pi", + "type": "metric", + "unit": "tokens", + "value": 103 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"What is the boiling point of polyjuice?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"required\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Returns the boiling point of a liquid in Celcius or Fahrenheit\", \"parameters\": {\"celcius\": {\"default\": true, \"description\": \"Whether to return the boiling point in Celcius\", \"param_type\": \"bool\", \"required\": false}, \"liquid_name\": {\"default\": null, \"description\": \"The name of the liquid\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"get_boiling_point\"}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"get_boiling_point\",", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"parameters\": {\"liquid_name\": \"polyjuice\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "liquid_name": "polyjuice" + }, + "call_id": "63bb757c-e433-4e14-b527-6989b7ae6582", + "tool_name": "get_boiling_point" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "j1OaNojM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:09.337637+00:00", + "__module__": "datetime" + }, + "trace_id": "ZAeUlaWpRVSas5hb", + "type": "metric", + "unit": "tokens", + "value": 30 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "j1OaNojM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:09.337664+00:00", + "__module__": "datetime" + }, + "trace_id": "ZAeUlaWpRVSas5hb", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "j1OaNojM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:48:09.337668+00:00", + "__module__": "datetime" + }, + "trace_id": "ZAeUlaWpRVSas5hb", + "type": "metric", + "unit": "tokens", + "value": 40 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"code\": \"def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef get_nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(get_nth_prime(100))\"}, \"call_id\": \"\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": \"completed\\n[stderr]\\nTraceback (most recent call last):\\n line 5, in \\n from bwrap.core import main\\nModuleNotFoundError: No module named 'bwrap.core'\\n[/stderr]\", \"role\": \"tool\", \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " 100th prime number is 541.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "uwED-DA9", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:27.524949+00:00", + "__module__": "datetime" + }, + "trace_id": "04_0VtRzTY-hrOyG", + "type": "metric", + "unit": "tokens", + "value": 251 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "uwED-DA9", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:27.524984+00:00", + "__module__": "datetime" + }, + "trace_id": "04_0VtRzTY-hrOyG", + "type": "metric", + "unit": "tokens", + "value": 20 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "uwED-DA9", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:27.524991+00:00", + "__module__": "datetime" + }, + "trace_id": "04_0VtRzTY-hrOyG", + "type": "metric", + "unit": "tokens", + "value": 271 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"Write code and execute it to find the answer for: What is the 100th prime number?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "def is_prime(n):\n if n <= 1:\n return False\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " if n <= 3:\n return True\n if n % ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "2 == 0 or n % 3 == 0:\n return False", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n i = 5\n while i * i <= n:\n ", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " if n % i == 0 or n % (i + 2)", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " == 0:\n return False", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "\n", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " i += 6\n return True\n\ndef get_nth_prime(n):\n count = 0\n num = 2\n while True:\n if is_prime(num):\n count += 1\n if count", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " == n:\n return num\n num", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " += 1\n\nprint(get_nth_prime(100))", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "code": "def is_prime(n):\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\ndef get_nth_prime(n):\n count = 0\n num = 2\n while True:\n if is_prime(num):\n count += 1\n if count == n:\n return num\n num += 1\n\nprint(get_nth_prime(100))" + }, + "call_id": "297a9d9d-daaf-4d90-9496-2648a659aa27", + "tool_name": { + "__enum__": "BuiltinTool", + "__module__": "llama_stack.models.llama.datatypes", + "value": "code_interpreter" + } + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "LfE6srhj", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:26.949350+00:00", + "__module__": "datetime" + }, + "trace_id": "04_0VtRzTY-hrOyG", + "type": "metric", + "unit": "tokens", + "value": 40 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "LfE6srhj", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:26.949380+00:00", + "__module__": "datetime" + }, + "trace_id": "04_0VtRzTY-hrOyG", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "LfE6srhj", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:47:26.949386+00:00", + "__module__": "datetime" + }, + "trace_id": "04_0VtRzTY-hrOyG", + "type": "metric", + "unit": "tokens", + "value": 50 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "Per", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "plexity the company was founded in 2022.", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "25plHusk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:33.915838+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 105 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "25plHusk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:33.915878+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 22 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "25plHusk", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:33.915886+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 127 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"Perplexity company founding date\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "\": \"knowledge_search\", \"parameters\":", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " {\"query\": \"Perplexity", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " company founding date\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Perplexity company founding date" + }, + "call_id": "4521686e-4866-48a0-b676-30333fee6f3e", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "8BkjXIt4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:33.355430+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 67 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "8BkjXIt4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:33.355462+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "8BkjXIt4", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:33.355469+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 104 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was Perplexity the company founded?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\":", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " \"knowledge_search\", \"parameters\": {\"", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "query\": \"Perplexity company founding date\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "Perplexity company founding date" + }, + "call_id": "56701398-4b26-4359-aef2-438255259953", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "QTbOWgfM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:26.519884+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 29 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "QTbOWgfM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:26.519949+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "QTbOWgfM", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:26.519955+00:00", + "__module__": "datetime" + }, + "trace_id": "CuKMEU31Q26a42-5", + "type": "metric", + "unit": "tokens", + "value": 39 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "The", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " NBA was created on August 3, 1949, with", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the merger of the Basketball Association of America (BAA) and", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " the National Basketball League (NBL).", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "W6iEU_Dm", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:37.336705+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 103 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "W6iEU_Dm", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:37.336742+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 45 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "W6iEU_Dm", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:37.336750+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 148 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"CompletionMessage\", \"data\": {\"content\": \"\", \"role\": \"assistant\", \"stop_reason\": {\"__enum__\": \"StopReason\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"end_of_turn\"}, \"tool_calls\": [{\"arguments\": {\"query\": \"when was the nba created\"}, \"call_id\": \"\", \"tool_name\": \"knowledge_search\"}]}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolResponseMessage\", \"data\": {\"call_id\": \"\", \"content\": [{\"text\": \"knowledge_search tool found 3 chunks:\\nBEGIN of knowledge_search tool results.\\n\", \"type\": \"text\"}, {\"text\": \"Result 1:\\nDocument_id:nba_w\\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\\n\", \"type\": \"text\"}, {\"text\": \"Result 2:\\nDocument_id:perpl\\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\\n\\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\\n Konwinski was among the founding team at Databricks.\\n Yarats, the CTO, was an AI research scientist at Meta.\\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"Result 3:\\nDocument_id:perpl\\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\\n\", \"type\": \"text\"}, {\"text\": \"END of knowledge_search tool results.\\n\", \"type\": \"text\"}], \"role\": \"tool\", \"tool_name\": \"knowledge_search\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "{\"", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "type\": \"function\", \"name\": \"knowledge_search\", \"parameters\":", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": " {\"query\": \"when was the nba created\"}}", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "when was the nba created" + }, + "call_id": "82c81003-40bb-4e28-bfb0-9bae122da716", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "WX35-rLp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:36.663989+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 65 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "WX35-rLp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:36.664032+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 37 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "WX35-rLp", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:36.664039+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 102 + } + ] + } + } + ], + "type": "generator" + }, + "[[\"meta-llama/Llama-3.1-8B-Instruct\", [{\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"SystemMessage\", \"data\": {\"content\": \"You are a helpful assistant\", \"role\": \"system\"}}, {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"UserMessage\", \"data\": {\"content\": \"when was the nba created?\", \"context\": null, \"role\": \"user\"}}]], {\"response_format\": null, \"sampling_params\": {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"SamplingParams\", \"data\": {\"max_tokens\": 0, \"repetition_penalty\": 1.0, \"strategy\": {\"temperature\": 0.0001, \"top_p\": 0.9, \"type\": \"top_p\"}}}, \"stream\": true, \"tool_config\": {\"__module__\": \"llama_stack.apis.inference.inference\", \"__pydantic__\": \"ToolConfig\", \"data\": {\"system_message_behavior\": {\"__enum__\": \"SystemMessageBehavior\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"append\"}, \"tool_choice\": {\"__enum__\": \"ToolChoice\", \"__module__\": \"llama_stack.apis.inference.inference\", \"value\": \"auto\"}, \"tool_prompt_format\": null}}, \"tool_prompt_format\": null, \"tools\": [{\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Search for information in a database.\", \"parameters\": {\"query\": {\"default\": null, \"description\": \"The query to search for. Can be a natural language sentence or keywords.\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": \"knowledge_search\"}}, {\"__module__\": \"llama_stack.models.llama.datatypes\", \"__pydantic__\": \"ToolDefinition\", \"data\": {\"description\": \"Execute code\", \"parameters\": {\"code\": {\"default\": null, \"description\": \"The code to execute\", \"param_type\": \"string\", \"required\": true}}, \"tool_name\": {\"__enum__\": \"BuiltinTool\", \"__module__\": \"llama_stack.models.llama.datatypes\", \"value\": \"code_interpreter\"}}}]}]": { + "chunks": [ + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "start" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "started" + }, + "tool_call": "", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": "{\"type\": \"function\", \"name\": \"knowledge_search\", \"parameters\":", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "in_progress" + }, + "tool_call": " {\"query\": \"when was the nba created\"}}", + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": null + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "parse_status": { + "__enum__": "ToolCallParseStatus", + "__module__": "llama_stack.apis.common.content_types", + "value": "succeeded" + }, + "tool_call": { + "arguments": { + "query": "when was the nba created" + }, + "call_id": "8fcbc41f-3723-46dd-aee4-948caaa2b458", + "tool_name": "knowledge_search" + }, + "type": "tool_call" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "progress" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": null + } + }, + { + "__module__": "llama_stack.apis.inference.inference", + "__pydantic__": "ChatCompletionResponseStreamChunk", + "data": { + "event": { + "delta": { + "text": "", + "type": "text" + }, + "event_type": { + "__enum__": "ChatCompletionResponseEventType", + "__module__": "llama_stack.apis.inference.inference", + "value": "complete" + }, + "logprobs": null, + "stop_reason": { + "__enum__": "StopReason", + "__module__": "llama_stack.models.llama.datatypes", + "value": "end_of_turn" + } + }, + "metrics": [ + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "prompt_tokens", + "span_id": "vNEXImhz", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:35.213589+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 27 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "completion_tokens", + "span_id": "vNEXImhz", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:35.213622+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 10 + }, + { + "attributes": { + "model_id": "meta-llama/Llama-3.1-8B-Instruct", + "provider_id": "fireworks" + }, + "metric": "total_tokens", + "span_id": "vNEXImhz", + "timestamp": { + "__class__": "datetime", + "__datetime__": "2025-03-06T04:49:35.213629+00:00", + "__module__": "datetime" + }, + "trace_id": "4Y9e6Ll1RgS_fFdF", + "type": "metric", + "unit": "tokens", + "value": 37 + } + ] + } } ], "type": "generator" diff --git a/tests/integration/fixtures/recorded_responses/chat_completion.pickle b/tests/integration/fixtures/recorded_responses/chat_completion.pickle deleted file mode 100644 index eb7534e6a7222e3faf9edc4a07629989e0466697..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 888589 zcmeFa>u)64btl+TcUx+zt%qev?rvEQvAUf}iJ8pgn{=@>tXDVXDt4)gt&&_UB_ktl zW<)VEB0D0IEOxsWV=QDWU;;K^`jhv=0{ai_0=omO;n@%R=D$HRn4QsLTEJ>7WB1!) zu-M-@_uhEqgJeCEtd4Gpof#Q%rs@BHgm{_e`}f9@sv=aZxIPj){4(o5x1 zN5p2u^URJpbA$d?=Bo9|^5x2gAH`wMic8bRy*md{EIKzu6q#*N4g*iDl_I`|U#-B8 zg&(h#ej4-)GZcntIKu0;dY)lMksHOPAD5=4jSnIb=9}o@O16P_-92G+sDTI#3f~e& z?3gjX!F2ry--NLrg!U@E!3*@W5977Xz!y{ZryBRlrBFoOz>h?u6@(p==;t4&jdCe6 zJ6+H9+l{Unnw&eNvi!jbTuZE7#oYWCt_)hN+_N|%TZJl_q2PB&J= zz#}wm$XRQ2P^;;sO`Ahs2j~|`4 zH=TE=fp0i({f$$?-RY&T$P3PEa_o7?oG))WUva*cz48s`TZdmn^KGophzG`b%lS_B zj+dQD^z7v=M;xWCPpa0-o6b~@j;SZ0WPbRo*9@zNes@G@L><=?wqg3V5t*)SOtQe4 z#KK}B)Cyg!3-7?d?T~nuC&Jw^5&gO9vwkt=4U9_5PK=&6=fdzgJDkjNeJ8+Lh^l<4 zYMZf1YYHqsaR=;nTXGv#&zCT`jiA+lZ1wq-Y1`j$Mo+xx{bc9mGq$JQ5oTyPa_16F zW~dl@7j1L-S?_!xjygea%c9< z4)*2CJ=cp}pKho&Lvv3+0uvWtAI*NHjoKEG6}ny8(AG+QyaN5jz)r|o41Nbcu-bxU z7)EO!mp<%?@SwEH+5xLC8DjWG-cO*YVej)vjmW(VQr6%WNTf;f47>`rh2q1W8wz`k zv=Mr$W%fL!VSNG}i}!TrW#<*=RqWm;i~To;3focgA}8Y?f3G14|Lw!yr75^#dYC8L zf%|vU*GN-oSoru z)Nb`;J^73LHu-bs=Z8j?ld9B*GEe8)q!K4b&R;tJ-Qky9=$qM1H+p+uV)Hz4_yroshHD=kzTH}|7w6|&^~#)So0WNU!K|3I z<>kuK+@iI-GC#ZA5-ZTvn1yKBAEDVVWmZ0Efv;yLiJ9f_H8X7YIuQ2}RMW#(dCraw zzdmFSIEvEo`3L78onI*4qG^3C>RFZ$_6aQLIB65dZJa01>dzi&5uf(G*szy>|ApPK zr;XHF$s4|@U7RtWA$l`PH!QM5C8}oBA?zWUZ;7o(d>gIZ(`;QODCdrW5 z#qak%o`eDJ3A**#Lt*t|xEms)!V1?UZtR5~{pQ4RH(H&UF`Gdzu9ABM23lvPBW%~K zv@y=uc4tJ=zzjXA$f3t?n&GbTXFY3IgiR5)r;Y1D&$mSgZg5N&9toe7z$=a`T1G;L z@qViXn?5vt7zoe!Q?u)tK0QG{u|Zr;Yc}80pJ*;TDM2yIo_`3_31GOSqX|bzgoMI6fX}3rv81!)>zb zdm?$kx@~rhn?VG(6h>9IPmVCN4S$hw4T_X;t(`V-i|l7B>Fl&|EAU)=zv)`A7Y)2S zj+6NryjczNx+fkQci<55;*T_G#6@s(WM+v>(I4NU4zI&~bO572g6h{X);$+CdP?qf z(-O@f*flm?EAUKkH@MG>(Eaot#Zi}gk?X@6H{K0{Ue{Q6IvCM)uX5FNJZOG+8@*JI z!@I&?-!R~gV*`cw)Dy-nG+_2{FPIno6_IO`DMM6l7?1?g`pGtPeSPD$ZiSXkWWj>Q z!l_l4>npXDm04U1&3*EIky{bJ_&v}2WZK9q-zQvSPrT$in4QW;$r^vkBH+*E2=g%i zs3)S>xNRO(E@QFtCI0hX5V{`)>I#OMFa8*O3zv^Azuhvyh-6FZd+!F}ZWsieNM5E% zIe^Rd2IdGdiKLwo^gp3A*xLZb#C>A8%pRWA)~m@0%2uu+mkc#h;ZE zf2plI4MP~~1uk6!ooYjhd2p<>_%V23A|v5wtiIV=ZN6%3_{RHI96-L}!Yur*$t|IP z$_0kE$*f`W@ZV6hJOO!zhKV!XI5MCUlZcC?hew2a;9q>PLactip=H7YbU?Sm^(k*N zK@=QB28P{+73SH$&TSJ8(Wl z#ze=lFVt%03QfuakA8lEFVXs5nVmbu3QsrC?Gk1SVif`p{q4GYL2SUc+7_5RNPQAi z-5x&kWJ6N7u-Wh&3A-(ZZ0q~``&G+tTD++fp)!1=j zZY!=h;G7CgYDK@NqA00=Njy>@Ox&sM@#Ah zAIk}A3X3Eq6gxBR=#VW~e#VenuRs!F85rxBCBtGh44xjPUj<56Q(Z(9NvUE+z{?%a z(YUmr?I}$OGJ&7QGRC?G$(X8`Rt$B8=6_d+u7N4Bc4^?~X>z%m{z1|JHBAC6CBwnm zkDaQqMRzlaL899Ykod#AE;yRdrdA4wv}tlcg(jEn=1&s~(7F`GfF6P@OBeAkR*mT72#v25fIsE*oM0c+X;krpl1-0lWcTE z@q#8I0r-y(bxgm6ClThiVcKXIRHJ>Vo9PS5C_<5s(Fw2`8mFEZnK!df&TWHiQ|okc zgw^)M?1663LP{9Hs_-h1tV3J^?HvQ0ZWtg3B-zaNw2^32XWF1(ANq@(0eenIz&uW8>?@;MWfJ;~HvfFs7Ww*c= zogQ=>{C8yE@J42Q+Rruy*4vP+Lq?=9MAt*tHV-g2Zim-LU^EeVMW1s5g!fO#KN?iC zO>y+tAiKqbDT%(WZ}jZ|b1U`v+QQQCDZKwlsR*;5u`oM1eKNlP$8+@Lf&=F$5OG3s zLC&*zI3y7u8>7w#`rp)n`+Su8mpTaln`{W zel~PyhD;I<%i%X=Tv|n>(^#})|Lu{}qQs4KdX5`7_^_R`7IeNFk{WIy*v_heQ~?C+ zTG~sYKuQY{I~DD!&`ws_iKZ>~daVWn1Zo9h)PAo6EBgoJxg~#05sN<0v1nq5hKp$o zVOioYVhR1124d3EW|GVVF5tkjjzH7HPZWS=4h2gp21WZXOz?Ry1nOKs%=52Op>j<8 z^{GdT-+*b2<&97<7IoR5TAeWPkJ~c*aRy=!SB#5mMqLs^1c57&@dIz37m0*Ix@WHc zp0SesZBEoo34F(xHP+UQ8cfkK59se1?-+ATOEO?%9jSMQOsTPEEb!RD?F-A=HNH1i z65jv?;4nWKFkTiIM|Z4beLdwU&{B4d?;EpYCCHe{^A)Yv-0Y5(y00@w5lI-SHDi&! zj+LOA9vjM{nwTK@3zv1nkDWdt%-(aXiLp}qwHl8fv^`KVJNCi5NFgD}1{%BEx=gpQ zUPo97oLkSO@Y4^@I_LDD1FmesdCrexiTWeurV#PmPuMw@KZnKE#oAi zt@$n^HZ={W6X${^K5ZFfvq@njzh|})78?1x=2i#pqGg@A zDQCN+0%_o}Y~v)7Qmf8-|n)YBmG{--}|TFNXKG{Vl|&MU!IG-?m!iD}(nexAY&c*k3?vXB zpm-79)t)CV$=kuJCs0zh@y+3xXE{J1~I2gUJFC=N^S-SyiyAW zWmwxdK9M|K0%0?XvdJPObuL3vr)0`pXsYl>CTB5EeeiO`amO*9w0z4L3^GvoVPU?W zjT1eS!=KMg1-t&r8B@XDL(UhCDl(X&J<7fJIpI^TrzU=i2Ld~flEM#n{fJoeDHpch zZSK*5=PNSbcLT@j^+9`FrFT%&sdRzy11RZe4=Kh-aX}ml`+|vMFaQ~#R3XF>D4i9_ zM3Wb{uHC%VxV~}Y+NH^DpKuAN?|>V?PM!~e&{JN3tQ=0k@R7BFJW)5UQkFvjCvdjl z1WuRd&_BZ?H11+kdBW5D!_WDRGsyKvN};55DoXf6-mpD!!MH)oo8tRp^{u@+VYiI6 zv63)}@jz0XX*aMPlqYA99y`+tkaZhIGuB5ldk4KWr~A*S+ZBSV{Rx z6gj@NU;)*o*7j$%#!AfVGjXT8Y%f^LQP&zPC2!39X!)lrAAa=XpCBz5Igdywtb&&& zrzXZq*8BzIcCa5QpY`M>=d4g3b%?9Hv*RRGuY<%)N^(WE^w_1r>?)Ej?N&8%%&wS% zkh&rX)Ll?tbuReW>8%j36VdDmzYMz7cuu9HvxTmvV64nARh}F&dYb+W?c?RySv}{5 z%@`28q4(pc{$4Y9?Rurhdu)H$YoVO|q=E6c{UYJXW0@Y?K%jRqg z=k(3F#RZB2J@~JP@HmM0Jox|c;V1ts{+ysO@Sc70u9@4n`gV@2>>epSA4h&Yx&+Z{ z;8bh))T`*x`M8ghUdV9Fe}C4(F@-PSiF^Ssda!ATAl&vdwiCiFFkkYbFzaQRqPs~6 z(?N|>YGFAkMs29QJ89;ay}VzJrH@Nx>FwBMkWPW}l&Kym7CLxj$0i$o@{H&mptOUv{PvgSxt{h2YqUq~-2NtX1 zVnuP$Ja=(s_$3IR1#sB~z5o;pUMj##?6HoxYm>v_g3rwkw2+|1dWF6yEJ4UPLGc-pM z9mGvzB6c=mNMhE+iu18>2)Z2U(Gxp6JKclW3H%AE#gB>7Z{u*-@W2b)C}X1O5BqGE zQqFHZCZc9b}J^b3xO(iQUc0%pW6v!^#DIqG$Q zfyMAhw0`=i#ZzM<_#y$u2Slw*tpoWw+kq}(r9j{lu*5!L$?-ku6pV@HH%3kUxa1>J zE-X{IaOw|MuCFOH4Yo-9p+NGXw>zHW&!|V`p zYAIl5gb5TF0Zjo@O2NCS^P3p< zzf8OciD55xwte9MWf!h=x2l+A;Q-|$7tb(n{Sm_o8}V=tOx6?MXIfVcI%x1nMlSE0 zr!Y2iMqk(i&M$+sLA#i>lS+k7pQH;^N{bQ17$=rBE0`#mr}W4{&rYAzBmyrphM$Kp zizU!(+5pCgIdqg(K9pL&a2zOdg<*Wj0=L}ah`-}xie(C%>=id($dKrdI_U0o2aOH5xo(vEzhqd!d zRg;5Gr5!yuz?qD}$}TAi&x4O}1K*_(*ajfeo+fGhBK~vuCqp&_<-dc~6=NriJ-1oy z9?&PwW%l9;mCby_RA=g^oZO2i1%As9ai$;NW9QFcqEvq8g2!v(!dW7F4v7YU0LAe` zcmwEI2$`6qgV0wJ#^}+#5`8J%mp;TeI7{?jY-mE^Y}16&AS*2Y=7GdcB{v9Gg4IK| zE>6qHJ?JcTWFg%NClq;rC{xMB7&z? zWu6h*{?8J@-_L3r6J0N^qRc4pZp-*jg)0zh zpXBb8eX0PtMg0VFW1*TxxK=t#voIe(DAM3_QJQjTUFnBE1uH>6+|X-4-SyBjn*tje z?N?_*@_(aTB3c!F2TWi2Obzh`J*NGG*#>ZOnQ#PfaN!J%D6>NZbF@5lhRAyGO-h^o zUvtItvW3!?>-LhlyfjytTUu;Z=I540Wu?AoSL}uPg?X_k?Ai(y|4ojH9?1s(?cW|f z_NSsCi=&2%c`q2`ni?~4%H_*r0i@3qiY@W}2 z+~5&q#M20ekZb%3{H~`jKl}JQ@8r;(S6v+CL6zaN<3et!4~I^+>wwz{&O%y(+6kw< zg+na_`F>e$^H>*MY9nm>1AdK>Zn5Kx1Qj_XZ*nHsgaF=1|Bw9bNqsIb6l6mu`Z?rn z($S$!1$j+77lzmh@`lu)m*1teF!{8L5}%S=Ieh2GD8*i&=piRS7xLAxPN>J+Wn8BY zm#c&4ajjMb&<5&wQmy7LzO5jxirlkC9PFY}*;$ z;C~0~alOWZY=)6+p@#7h02B1BU2M^K^Q9sPOCY44r zmvTbYKk3#nk$#bRFse>W+(Jc6pz=|c80f*NLQ1pJXRg2#9sp;Jyh=7En&(PsU0#(N z$7iq*GUlu=ohyc#qbM{=DfDBL3M*1700q0(p%SzDw5BEzd2Ez&q+;F*S(iiIV;b8j zUUDY5zyV+8=}MMaiP8bbL|`>P$2@?xOO>V>|5zpu&_E*A2yPe?mG2}g$Fv`yJS(km z#R?Vxm+8cYcks-m{j+q4|A(`NuzHu)7Pc=M>1KeU&d7l626V6s;up2U4<7VTSbhLy zwLlg71rw~XA6u-PwTTzvO(N+Tw%Rw)B%*#o#LfU7*qXkz3T`T-PjBj z={V0{O-q-1g$r@YE0?_-BjwD*odxS zX`i7ii^G+?oFPh6(Exfsp3DdbqCeSQ&QdR4p8N#(dS@m{>A?l024reRaniuy+s%d6 zVrzbBq0*`gyE2b-6#xb*_eDxC7j=swN_x8 zNra7|evdJc_kH%a&~Yg`rYjM~2#5-xQ*xWE8n?7pW=tf0PgdmVm~EG^u(|;stK~E1 zWHi5LhFEF0f;blAihz7k9Yj6XuEMvUpQ{ia|9$g-VpyaC*(y$e?U*a$5fjphKFlWy zqndHmC_yVa`OxsYLOO7VE-%UE`z}D>aCTU~ijFk1a)yFQtbG-#S>b0a{EVTn*M|EU z&yk994*YFzpXhIci|Y(IhYsr;u#tZK)A`%Z67_HKCO~v6=RG+EV;pKb7cRJkdd)KD z=9`s;rU}QJy@1N?t=3${T3EEj(!yfPT3I;J1y>&JgTo0m8PY&~cN&zVUUK5+GjMXK z3%6&v)Df{+@jSC*&fK8CmAPuYvV6I+;ltz7vncv>@6JJlii`RB*%4pU$^;S_OFyN; zihy=C4M%w0R?mZq9RbkAM5Ob~YCkWYM=uwsD-l~4PQD^FH1IR=(Zb0`_H>zD%?%3w zt(Bff#QGd*l0%$bpZ|DrOx}>n2@%?SP;^X08Yd}-W1{Q#GC^Ra2abu7uTp9Y>7Omr zi&z0ov@PeExlTu!L?eQ}wpD8_FWPgJ+2)d2nO|tuD$8PFxw71vU$ItJ=4b0GwWp+U zzRJV!*-|YcjB45p<2PIg2>}jIbU@3on=I!zZd#@p}z+n zXM$uBJ`4Tb8HP49;vCo*gVjsNT*>kZlUoa zgX;x2j&P2gJELskGnAJ9o#VrVXDP%sH;{gHk|)TRJWKSyo+JaGi{tS{AS)`=dP(ILUlR9b%d`9%PqsgCmEc>z8_W~@Bmc3b7l z=cBKrgHY@h?wy8fuaz!-@WIB_OSrdqqM%HO*wHpZgrJ zA(6W=TK;D_O?}~%Y3liO@i7I@49e$6YR&L8bvRMRMAr;xI!&7Ti;yn&b6=*<73`7N(`t z3{g4PjVR~{NLfQ5*9WLvaY-Mv>pBM!pK6PGO-c?1c$+$-lsm;VCcF;ow^|_9b{l`L~I*$s~P|s2k72|XSfg_yJ z+Q+3l;Kml*bv)A}vKMgF0=~N(!T_D9KmQ|?b95dJ4?Z^Z!oN04ms;wM8)P;Y8JAUXDhR29Z@kM>Xj8Sw^UiG*IQ=OZd%sz zauF5Vl#B7WVqU1I^=1(j3w$K4Jgcf&L)C{<GrJXsej*ejpXO(pp^Vb+1|8!x>;O65j5II@m;s{~ z70Bo09Z*>*1Vuy{!T8U4h)?bM!M-PKBs)vYzkVl%!qbK~nrf9LkpRGan_PLhiNYQP zNR``b8AtFGAmebF05G)wmE<=Wf<#y~u9!Y~ZB2Mp;$BF?!}QxdBx*xY_+VN1p+V-Z zIM_!SgovopDME}mp6-Q=nnkLpgf4v9dBu74K5YD)ZU5y>=PSE&-YZ^<9hOJ5o0P#E^odAu~|P{K<8HJ%GvkqHs}eF*71c52*SSkO z4H=#H#lF!pJ1(k~fyz!`BgBlonyZ8%FneI!#Dm8Ah76LU*lUPVW|7eY?k306QBaaq z5#I4ze2J%RR<*kAr_GvH7(`SEYX?zvs0#Qct?AT+(%^K=a5qXxcY5tMw2~HTl7aS_ z47wIkRpA#^NNQgo-KT&km&Ml{C3`nWFIio+kPdlmF zyF5L+0Gl{{)7Bf^fbU8<>RyO3-V%wDSFs3#V4;oylTO5CFmJr}&3jyAA^?CvxBdP%;OWS1+PBp(nh= z2wM=NRf&5Leguwmu(}pJ~d&my9hsZSu zgph@tE)dxYC`+pUwGo88lz%%x3;rY$k1R>MWDpKpRT)X{vF1I~^n&ILiN~4S*VeDz zyjJblv{*5QZIYSkkR}9IxBX3Q#Z{_LOCI; zZ)*J z4UrSI%qG9Olgy%=W^7UtW)h5o6@X=PQI2lIGn>M@v?DvRB~TrN)`8xL*yG1d%%)6x zI!3KjsLd@fk$N6&#)_eo#ZL<|>I%yRXFz#U@7{MX9kR5BW@K#N6}5kF`^=M7l|u8s+{ECh!iNY}FGwtd-O=5UgkzKs=6#9HM3q1m{Of+hUgbTz8U zk>j(Cr_?O8Q7SL9LYZ4qRnZQtXog3pV!Ru?mYiy#>Bym+o05~4=frm;RtuHe_g%CI{W(cY$H0b2D#>JTkL~r{% zMsw-1-JvqU4(jq@N)!3?WRn6-|Fm+ar1X>d6XeeJ>3Vtb&qYAS$)9sqx4;s~JVP6I zVPhPmN}ik_4D5kq1S{^D9&W%og-K$kH?kHFCUW)gA;yA zFO$w8H&msJ?4+7(pL|l>H^NYR#MQ?pU8*3bTqWPq6qZR+0#!OlX{SKd0ob-l1_7T*SH$D zJrkQn%zl_uWWXoYa@z>8kX}HJA|>o%QhH%L1;H+Yf+U!TTHLLca*l2+$CM;BO;{B4 zURr@f4q6^makJAT!@{~>RTgnk)g$eF)_`9q2xCKjYb-TkW(1+xNWRONzF<%(SzC{( zemxxv;1Q7q9?dLSZVf3#*V0zYu)68j3oLL|1zV^SY;{Gv=<}~d524Ya6pV!_aRP`x z5*`_0l%J9vwM>t(yD(z$R@kVr#=0N#MLCGrQ3_;rTh9dq~l@5q1rulTJlnZdmIOi zj@RYT8tIG7WyvqHE%_sOB+r8KJO+vdqNK0vjWV{a z1RjfOiba5YmFr)haH;@IIo1IA-+lh2&%H$dP?5iL5GfxF&mZ<>jg1V^KS>yog7iaP z{YU-DGf-zZ!f`{!elwX^&y%e0z!M7B#Rz|X_C(q8DR^=eI4x`oy#%nqd67D}AJOIa>)&a%`#=`v+iu_`JnUqGLM_haqi`OUV1 zc_>2oCvdD6Ad{USJm{D0U>Mv1x2}w{qQWKag||oDn@zL&MBz(iPNebpN$jY8Eu{ZN zMPe71t$m%gwJ|1k4cRk3|MA3_wuLHNy@RznCipt>6(vn2oHZsY>nc@oa$3@rt_e{0 zF<)bPGg)3~vd}Zx&qi$iFyw~kcQK8hOnmY@25?B`V1Is>B^GC`=AvC`S*VsVKZm** zIB~X8Y1R;uTwY$BT^4qc-z86;sjN-pcU9gh^1B!&xlC@dwNm1v!%l-Oobs5~O86Y% z6j}otee`UPq$N@*O=K?eyG|AQ)$Yk6zbg|rFY>!GmTM9g>`O=~^1CSZK`A2?_fdA< zPA+XL4XGCSUC%rzuE_5?F3pSL7}q&bB9l_e!2*t06!~39Lq?3D$nR2FdeTLiCW;jK zU6jC9Z5^njT3?D@SPzAc$0LlMAnU!#INP0@L9v`sJ0 za&vC3(p;RYnJWwR<=V>JDRR2Ll?-;iyDuhW*Eh#geY$1YlI9W6cfqZ3jl9=rHUIBXa_|$OgJ^4CEI& z66han`#`FSaR3njCaQ5dMu@YGrnhe%;83{WQ;iC(MMYcgws}n7T0Oo@8yKe3PZQOdHX@w7_3Ukc@ZbQi=MsWVaEhbSeW!3W zBj^LTS$>d*Df=Ffer|becTPXW2V&=>TZ(YDuam_~I7H*-&UawhI{XR`P< z$kR(;j%EOo2Iw}^lQ25esys40i8G&+5^hImG)rz-UZ>xZ_pT+MGUvS|Pp(ZqKo)EE z9C*r~Ym@YOrgOkcquPHXGws?WV;E!`$T5s$CbR@FqoCarBX4BRT1)vyQL2x&5`0Ah z8FXNFJUzGHSKYeY;6rlw*MMH1m z=~=~clgpcWmN{@tLq)hTw8Xo}nkIQga!dgi1H+-s7>g|6a1z3H$J)7`8)? z$jshsbrx>m8R5@#O=}l;SjfLt&=@n+9vlfceuTs6c632MRn5LgrZm#IZAcBs8QIIL zatAOh4^$K88N@L}(Sh_U09%2R%S4C3Dv)A^|G`%U931Wl0c-&KC_Stp+l+cRzYc5= z3D$H#E)F{RO;q6;1U&-=7^}q6t4j3KX>-ZM3Zj7Tk?pA)>8uJPPi;srCfa4>J_G3v z2!lR63kJ?H_DMPdcLKh4K(q18#Pa+Vk(4YIBzQ=KsC}_r0{jS_+}BtlGG_|j_v`~| zd@x+Vv!D@$hZVA(VcYO=+Xt2-jb$HE68D6FpJ*>q_)aQl2J@BQTMF)JX9^>y zdb~}0DtuFXYyjP}EC6-$lrz4O8v>+LBe6*AiIn3{dkS9zne_u6T1^yC%7ATPK}xU! znm$6!ka%Uh9j+bo4{|7JzPBWHjLL!Hus-25Z2QMNx<-P4mxhGEVs{+UhB1?j-O?mn zj2(u17?p8duPp)saB-OsHK7ls%c&wbEem3s`B6dJ#KLlX7nU<-mpBXxz&^iA|CeMC zO?x8fd9u`qpncUp8IVhQi4~yMmFeMRhYi^U7Og!y-t^&UWG7QmBXZ-AQl4SxF}AL)%`9qii7+? zqtStM7RK!m2)3&Rz#XLA06Wm7iVMzX;4%ET8UWxBf@XCGr!RrmSZM}8B>R)P=={l! zahH@bnl_$AK-Y$7Nw#Ge-;+OQ$AU&2(kuedO{C6q8g`HH?{&ZNF@1jBHwc$&&jo-h zK`PStBs(*28OfBT54|S=Sungbb-5u`(2aUOA9zMQL{0F1PSb#p%VE#%}FmW>fz8cnLc_m!D39f zgYxl7)})WCE5|)ev&le28sd<43R;bSV>Pd=^UF@dX!OfYd~tg`9>@cuivRgu)2eX~dcsm`(sZE2$VLm?D`1fhKhcWY__66GPrDPzV_xhd>8aDH)|z zVBTq8r*|ja(j7r#2yIf>M4`$^+!@+!9{{J15RRy67Q@-sa+lGC0T7y;GAFrEN<&fx zdcNjjL{}VP?LvKm%yR@tPLwbS8${IZFz^qXn?xSZN5T-oukO3MkZMpi8C03R&D_SE zHOjIR4by9jW@t_^3L+_%BzA2#g3+zfSfN3xi3SKy7(4`wr0nzw@42BKYvdloi>k!C zmk%;xODBF2Ov2 zj+Jn_&6qYNAyp&#T zf3$CQnX!m>u)asAb_8)$?TS`hrJ=upUE2(I&4&;Og}rxlQiC^dW_t zOa|i?gp=oC;>*|XT)hP&6}SED#DCO_)};l49%+h+DmYB!zRos=u|o}KCyC`guNgLt zuo*z)%@CbcC++~17UKab26{nS<7qWbJ;D9ZmHnD#ZSfuANaBCyN~zZ;f>_Ut#-pND zthFl$6dRzG7U9UN9UMY~7n;&{Xua7uw+I)Rtu`w16b(~-kO)L;jU8@*uT30CAt}QM ze}9kg##n510{~Be-q;~=%_d`h-(yLmV9+wVN^aeu$M6agY8<={u!ZG|Fczexp(C;n zJODf$QX!XDX81Mpk*4-K`G*LtQTf;act%?ZO^)`ganIE2 z$RLv2eZK;Qok8y3ikd{4X3qHm%8nE+xTBOCMIuhm<4FCYU0<$T<9%B;0CzcP<9 zuT6WQRcp2C^R?RSoH@HNBb$z3vjf*oQ&jC9NFkh2d$caf+DnTtw-K(9$9PB%6VFJ@ zJbi|Twu!``B_mIICTXLV+76zfO;#cM(Iz=U(ZTHfR7r3=uc`X6<4=2h|lVb7R%TrqPW~@cthB++(%+z9{AQLeGpi~Qm0h`+NeKY&PKyXT z8qEMXz-i+p`mqkfW2Z#9P-tJo3bNsklWRu+^$!~yCXBq)G+WX!lCD|!Jku}Ib-s2F zUP74+xJr@$o#a1+Y{!%be!d(v$ z1#E5X?bP|nNy%q&TpPN=g7Hf{qJcWDQsab7^@PCSJUf8uU#q zJLN4RBukbv*$LCktDnJ4N%Ve9lXfQM7-a`!0Lq9*AG29D;o!hSsp`ROOOjTCn(je1 z)9wR}f(DLOLb)lQQA!TRfURR_Ckb=V{<#m4$s})~ePEAUPy{jMdOfk>In36R3rKgK zTtg^kg26a|W+h=Or;R4$9z-C4d}}K`V*;8+6EU2A+)oTJz=^=f68&30xq^1o z6zCbqE!EUReZjcN8sfTgfx<#Gr2NGir_sxKr}=Fj3pT)i${YAv76Wn>0Iwy9eYuT{ zr|pu&f!r?L&pbeSH%hN$u9#leF;^j9=vuYPbl)XJO6+GOZ87DKswecrzxFGF?1kF) zRjevu-r=tI>G2P z*FUKK>z$LI{(vy*nR@{=Y?De2=_&x9Pum@AKUzttrb3(K@CZU`$I>I(j>VW3R`Gz9 ztma|;>{`VI3i1mGTsZ_?3ucrRx*VNcD}5HwB!B7rJLlgIlnMS%o6fKDWr9CNnc!dN z!GM41{09GK$^`#Ll{o$9P3O0njN?z8U&|j-z`|dtJ9D*(hrM)u=t~b@hJlkg7Bce1 zzkT|zM(rxtuh?REk4PN?1hLMRFefEJTaiZ<#zbX>sRNZE3?+S9Vs&C-c)IkMh%66) zZzdp^`Rc}FqVFB!;X&+5KxxUC*E^?MnpW0jGEX&fE z*}6f7EV`LGf^?Rn&OXj20ZU7`$XPCOmLEB>oUCzjqbT-L&d5GD^&l4(Im;-lUgRvt zD7H%6qr*%^&N5;OI^0p@EZeL7nIx31!v0{L3y?{YI6m+AdXckSk+YmRYNu4d5zO*riK6}4nnljC8Nr86Ui2bo zIhVFuWx5l2ij{VD-U|&(KNUN61-zM4&`{yzJCjkV!Too!d&% z5oxZ5CnTOWG4NCgdin$k9W3GlEq#e{I6(Ep<$Uks9P$Uqw#;X2asFkdaPXEO*PAft zoF9xZ8a^Tj1mLt>jPp4KW_2$PR*h1DS$%#mtIuZ!`R6~LeAWrFBXZY&KR)P+$yH-g zXN(gp^q6Ql>lxVJ86x1d)O-#p&NPF6G4&TmpB$B)*XhqM9~`{$(o6Esg;!sCDf8Qd z622e4y*NKVzie4^6{}u1EAveg*0pU}6|vAlqp(Ss?v_pANqPmUf`aQ%~` z!&f=${OIs2D(PHzIRjA5uj6+3A$gwS!AGFZ`KKh!n!hu(nUfihkcsV_cs}DrT%53)h9n*ySD`;>I&&ne!KzDJ6%!=SE&usum&FI zrU4bw1BSr@aAJD?ZUk%L^0jw2HVL_)|8?^IP((csz)O#MUcN{IFOtB48$mJn0=T*W zt}cM9qXM|P0Im)J&kpz*g|3`Mekp*f371C=45L$tmI=+nD1fUeIlKU_zP%0|R6{Ek zz|{qC^$4`@A_=@m0xy!lizM(O3EaOLSX_&yfoci;c!fn0c##B7Ale!ej*hZw?WzE- zrigt3Tus26`mkyNTn!NT0=T*Wu4Wj1E_q=_E^zn@;A&ZcOCi5&P^1F5x&W@`eCbHl zn?#91Y$pu(T>w`DoV)<8wsujbtpKj3%w}FG)glQTfa;9eoi2|e3A`xhWSk<~rAPvo zD6Qu%3H%XL@{!*3#7XR1$g#SD%#<>vr@%S3R=W7XsnV}>ap!35;}WMXB73>im%I4c z#sAdBd;3$r`r!!*AS|jcl%Q*Ci-}H3Xm3jby4=5ArUf)wtbW$Ms5$4HKhvOs3*)Ni!;?x;eya* zQB|dST#0(9u@09-$kpegs&Soalv5dYstTETdf#y^han!qz}xoaR}bLjCh88G2O^X; zS=E6ss*O#>GhHfj9_Xs_8e?hO|DLh4Go*gz4L6GKq2ls=45w*XqKi(L24(%yAx8Xw zD$9|c3aYrQ$dK>HEkrcGh2F~5J1GLg^9f7_8YaV_cgwQF^cE&k(T-=R0rvi(taI={ z$ZBZZzp8##7WN$R?5;Q%{iH09I^s!L+j`_%gI?V2#SQv>L<6X;Oyz8;jyc~uj1KC< zUB~q>peoygi_oeGME@~w2 zWTs5sAU$`JNI`SF{OJsTOlw+nnu5#RM@Bj7K`@w+@vi7}Os=B-qkZAe8ujYJW&T>j zhbevyI!A4S!-kD1YHd-W#%tJ1eSJ3Nfw znwDp_4fImUZ46%s0q0i5a}&`WVXs+T4vce(W*bY*X<2VO z`7ZbJ3&!Q1>)Dz%{d!W;c>3kWMm$UP!W+y6IIx|xQl8T7K%N-3I+h6KDP~QBL}QxT zL}bYAu4BfKdFU!c5Z}W^H1RXN>4I@r7_2k;*wjc7LA%&f^UBITzN5;#B$MV)s!A9% zZUN({_q0fOtxB@FC)01Cv^TvT$4-y0J3h@qI4>gfYXpa?xad7PEj1=t$R-XNiD1Gg zD(43bG}qx&vFPj>tEHwoBehsBCvAr+uybDx;||b^S3}WS-DwIh*uSXQuEAOn-Phm-BD|NFOf5)k zlB9Q`jb!DzKJ9<8RDq-%1ZVI#lK$L;iqC2u(pfR+2mEON!b}a53&t(z*;1Elku@MS zDH0L+Dbs2_t3P@P@>^-zFI~C>B?S$Y)1_CHoN%vpze+m;CJPD%4g$OH3DaKJ`fHgk z8%9|w=~Ju54K_2%uHR@0GwuPlWNp5-vN%m@S^uS`Yes9=!O?A4if8u7O^8Uol z$fd)2{1dm;4WdS>iJ`gMK@4T{RuC~Gn%=cUO(Q{P7`!@2zN9l8!CYnby1_DxCAbty z`@(HIF?OizT&64Zapw|PfoeYGG-HPBFz8}j(1EM+Q@OiVtJa{R`CSisb8bG3N8Okg z`Znej+iEUt`&T5w&MFkc^dy8V+jWUl%`HAo;^?76Tn zdh|6VvTAIQ`U49^>G!;Ou=GmavF(#=A*m+10X8-C)+BSTgAb4^a_d)B6q!#T```~D z?Eyw%w-OD~doSJb5>4Cz4{LMbXnl4_0r=o{8EbKMCnP ztsVrrb~A)<^mGLTC;9+|&?UPegv{we7p38q`V$N%1?v#@e3&W>bL>TAZP1QPnk0=P zHG-{&L_QR`%ytX&TM2)4TZL{jU}=XTHp$4iXnAf!f2FMlt|jOb!e-|ZxA%MoPMIzw zk_|vh32V0-K&->IPQNs4H(a}z|9&a=U>qPGg%r@R+BIWm;NF?^ce(#>Z)@?;_fjQ5 zhrN)-m~C=-fIcvJ0*9@llu|j(nm9RmJ3IYd!L-8mwe^lC*E~9uAk$w>Z(%-CC9HR@Kk zL94|xp-5q=Kqa+;?t#HAft}#DRmuPnqqTfVNcA)gl2nFgj9GvMu#^$76+@SNlmK?4 z%?i>R1X2PRtM(wHLpGJvy&yCp7f{~VZwua3$$thr3vH7sNe)xXam60yrg@-dA3h~X zKFx!82tQ~lfMCGoM=-+);DA#M6UjbiH|6FGuzYCZ3XPj18?AJ0p@k71vAe z@Pc+5K15m$ne(0G$i$e*<_FfB@?H8^H*)2{Z-_>~Zmjx9%Zau_FcVmpjXI*U+##P= z)PY$c!l^!Wi5fk%j`9ep&G)Ao_sS)?5y6&~OByM&o7lYJKrELaHef~qh)J3a`00+i za?1?OPE_V?TeJ_>wu0_0{o5Vd%W+Y|3DCGiCS%e1#qVi?}4C{RA{8~c7rk8%DFk(-{d>w&n{Q0e5Oen_Rquqri zi9`-#Lx4Aal8pE%ibXeCotUUs$3)=|03rZ`05XqdwJ$6YMHJhB!*v(23~0m%`FK56 zk`qHo%WUJun8^NPY1i`BQk|HsQaCWH7|zOFXH3-1@c8K9axR<+^&#h2t4_>SjWLmV z$+#+gn6&$`5tsV5pgpPx5dBDQb?%etbsh3Rdp!}2iR$lHC+0EVTjY1k=oiXL);*Et z=$J^n!0HM1PqI1LywFN{bz?>EJ#ZIs9@`uhn(_}L~chdNtOp``v1v~mMZ`YV6|BnYW z45>BJ=h#C@M;HJgTb zjJ`G|62F^TRB-g^TC{8%P#ggKHzvYxT7vT$kkx_`Pbz~2!zsO_NO4FVsbiwB29G7l%1g$3*8_oSC6AucV)lk}M}^c*jIi&7fqZd}^A^AZw@utx844Wu?Dx zZcKE(F|&MGrXlC3!$v(Og66b>z!?KHh{tiCOu!ivsVh?P%V!NrV=!Ge+J`77Tb%_M z6Tur*L(4yqNoOo+Y;%-RMvOYIHauHS#-PU3fo~!=uWxxrBXPKetvX2L++=}Z%2mKMeipT3cQv}9b}tT zIMru{Ese@twVusXMP|pnI|qnbck=bbBff@TF~f9VajitN&j+~-M|fT2ieWq>cx^f@TN47 z58y-Mm?z3Hq6Ub~gnS9w4+T!rM0I%BLLk;anujRD3%pmhnG^n)sS_!#w&?oy}xrLoc9uG$! zqvZWnoG{`HQ4`1ikunE|RdT{Nf{YWC_6Ra@I($C}?TDy4eq691xnlP@C`>=9xVUpc zmh%3qWFHz@%KIrMbd93qu!K{gbfUNm!G}D0{I6?I*e%3$yLJ`XiAZOT_R5npN0Q>d ze*}t0@02GO9zU)$`{CDdcyk7+-#E@3%~&7J>>W5uXNkT)IH=q8`os3}a@~s_Mv>!N z3l{Q3YiEhbueSY}t(myf^;crA9Y^)EMBJBVKKyC)qq#r(;SJY6OLTn)=jBO~RSFhdXj)hR{AkkvVALG<%rX4+2}=_nOM z)H6vkQ7iR%DXsn~z3HP@aMJ<)WcB?wpzkNQAZhkVBv!`eIE|@s`d7oO_}{)zm=!Oa zS#eeoz#qqg%OZp;MR}NFvF#6P7Fin&N0B&ySx!aj)4&yfoE~Wtd{Q67vA2ETFG1x9 zfg*yuSxYpL+>P@tgy{p{Dk0C1NeuNi!g*afFT}?d7x?(oMHynhB##ksHe_=@G`ld7 zDUns`V1!(|?cd~J{ALhe4{)gLS{Mefn>TS_imx+>mTi>Mr%NDgMo~6d7^@{Z5mOke zg|S)~tIi{x`59#*o*`pZ3(Aqk>KRhN55J`>=YDngEYbM8u$)uf{47!SX5Mnn8VYBM zOtPGZTPO$A)GB zv{*`*qqIOp7pIx_lP*(ayKYw|bWV(EeCwbfp_5rr9#TR*S|G%Qh_pv^h}jLBtlS%38Y6oVU}G#*auS}gH9$;0WlM(%$&Lvj|+Wi-e?1z zFq=gNyhd4J(71Bv$1?qr`i2w;#Tg%ui#OgOjJ`>7{7}G+Mb65ZN-5tbnAUwH3f>YS04g84sG=Z`C7`|pstfRDEbM`l z@*<78h)fr0)Ib|8(x}TXTx7cT@esAcnA9rA;MRYZqJs;@WL^hJRe+3)r$nekzT*JE zS?a+T=~RQ5`~<10l<4W4g*5dM8Muo$t+zb8GFxfQEiYH*g|$#wraud{rMk5;x3V<9 zGAqNChhObyn-_X@@d2uh7qIFHcxYuuzz*&@iwWx8gqc7@AGb9X&Ub3$a9o8mJ;s05N;p)B2aI}O?G-o+x&TL453godc2>J4#an6E zFv^YD+UzVH0bUPr;0{L@@8D>byJrG$c-^-Tj6V&0R7Z$*-D#lI`f!c?6maKMITW}4 zFmQaR>+b=dU-+n-AgE9&L-2K z2MjHrBjZ|?WWp!iVf({GAr^^szym@U2XI1ksHLlP#w|pZTACMnQuZ5DKxW|;L% zC|TfQ+Nh!`TDXZbyy{sRA%5;ohFYdkj~hMkqaH9`_(oWx_!WoYa7x#WWi=Qe$y4z~ zc!KUyjxdnrg@~*77LZH)Xlyfk0^9I>o*8QUOAPx>v$3CH@-0lq=H+#s4JwHmgAp)B z>%BG=7M`0n>MQdrm@JKJ%S8w+6yu zX-GEtcQ9?{!5hMC_fWQA^YV?U)34#8HbD^?q#W}iG6<{#>>D^At+)Q-^zhr8&O3j7 zRDLE!7(O1G_j=H)eyn+$9;a-%gfD`r5X@S?NbM%zBVuhpRF9pSI%sXvY$cO6%#Y#Ox=C3UBGX;bEr<_6)eBd`KOf(}CQL;^V(ToCCHWU0B1V#%oU zs4AIqbve|NE9$1nV;PSW5*_8MKAbt;Lp;FZXURo8ImAVrStN{+bau|sAO2Ay^NDV? zsEEio?46G@|E9ILveH^MEAw*}{%7Oj0A)G^Htx&@2 z@JpFVIy!tU8J45NFQ`#DI(&r&<*1jODB-$aFLrgT4=_Au3L)v4sW@lW13tjC*p9Ne z02#)`Zdgh<7q?}0$rADIQ}Kxpqenvnlq&UFjq3UqRejGzp`=1xq*Lu>R%sPeTSxe&Q7yr|hbDg&K?}9-7`RnJ zue*CdqptA$(T%;(BU`|U<8HJ%GsEDtRiF}>D6QC;=?L33D{YLc2oGjt>2+#ghD`|x z)I*QoG{arv&wAFb2%92oPaD?>{)XU$uQ;ZQ`so050Rqky#}zFjp~HB;1sELI=sygE zXZ)$zMQL+-f^K@(3xG^wthZWtK1%Mq(~Hp74FK}U+pfF5=`)f}(iV_>(B%6baD0;1 z0F49S9)h^hw_R~U2$L(pL*3G?5CtMFpy()?000zGm|$9dIXNGP6WX^W$55Lc}+{gV?nq#0)#ey5~Y%q~uL+T|hK-jdiDk5ncBxS53!**9UK-m+EnTS3)XD zKqKZ;PZ+n*fZ4;nU|#fBM6OMek&Yy87)V`U)TLZA*Vi|0Ygi;&IuX?^#lopom+LFF zm6cgs3(ft8TH*M`?|I%Q(?(|bKH(aB;w9g~>{LEV*7#Ev0e>!c6y8>T)DwU&x@{g* zE@QFtCI0hX5V{{B^Nzn@sQJG?2H&<^03Y$UTP7HhwkCb=UBF|7LC}fhWtv;U4M%{2 zV2>G!JLrX!I@0O+gvEQu3A*xLZb#B0+s3NL>diIZH!0fhE+NJs{;Zt%OSevdGC5c; z1~y!D3h-g3=Ox&`1b~%rG*;hitu|jZHhklKD-IxE3AU^b8LOE-6xhMQ@HRO+nLKuS zwmboOhK7kVUBH%g<=T_@WmxZf;9q>PLactip=EYlkb&FHZdcxB0xr!W7cmkZAvwID zOY4)-qC7)9gj@yc7d6v@6Pfgf4n`k9@{v%Ya~?$wfUVXBuKz)R(Vx8vq%gpD`S4E% z`(_A7bO&%#5w?|FdsKzMFnet}I7j$qcy!MV;~r)J%7VwR$pOaC>l#he_(I>w%?yY* zL+k}!kD;tEc8bzC5z!3Chl!Uh-9UDVB5 z&0L{LS>VynFYqN=-z&2K<2reT1JsPWk?xRJ?)2PP02dFE7}LU&ThdBuvT{R#oURf1N9Ca=;`eAU0g-!nrLp zf-0f5VK5v&4ITk!fMi6&Vww)dumk_^3eh#No1(}xTBWBk3WUI#G(b($>PX3Oz*w zwFTEf4S~Xpm5Yu-NJtt}-bPQQLZuM+xNWF8*rK|DAGu9zUbIeuD`?Y_92nE}+cQ7D z4T*&9-u;N--lnmk$<=nlY(@!X4q^0RgO$W!c}58eA~kHgDCvz=L;^_-43;HUGSCk@ zLDK~tKp(_juV2EGC}^DnG?BibLAmkk3&|)#ON`M8uo@bto@C$5kubN7ZQs-igB)Qc z`b<77CD4tjITO0Qd4T&OfXAL->?5wm-!EoZ#@$LLnT!IS zYS}IDMW=_26aO9AH@uNph_*a*uz$Ug0o%WAMk@IrYgRpAGGz&a4v?k=!6%0eAojgA<(xF<eaq{y2$E*pR|}I4l6Rtxwayta>}NNE)myLc5J1yP}Nv9y!)D z5pom;hKSxxd*wAO5nKF=nt{fs^j_hT;CMSJ_6DhelFa$KfMl3Ie`GfFQZG7U6n za#sc6(c$k%9POj~0K|MPQ|j*U3lR6f=#7px4_{@?>d5(8_V)?C$N`Td=gq@c;SPz- zquhzb2mj&sUU~^Hh4OOr;9u}Lb9j-z|BX{ZM(W`g;oE}x)TDQPDdVj@O4C#y{9kzd z!Ef+KK9k1JKTWRX=zs8^aeeav{hFhj8*=8ksX=0yD*n@##F`=Z!T-%I|3CCMC4=ds zYtNO)k}2j(^%U$NdO!KE zXYIh>djfhEcnGRmv`65HzAV_s1-zR0DIUn}gpPCegECfNiw5E0hg~RYB1B-O|5AT& zhJL4n2)9FC8HjS7 z-0W%PEW~A2K;T&Jb)fo^SqGhm+FgZ4ZnvtD0}XkK-grgclQg5gYaNZAL3BybbDr?a zcviJKm3f?#B~wz4X(RJ|`gX&nZ6tmEDqc!gG!g2JnhvxfADDXT;jI4Y8!@9gtTjpY z6(WZMlXowQp-YCq_lL3y-#)o<%AJ4=Epbjurf-)h26`A~QIEpDEKe$(E$LriazkAP&xy)np5WTHfiO>m8&SLr;SV z1lxrkO$d;kN~D;Nu9`IO#Gg*E&z+(d+o<^&bLESjotCYV5qQHT(5h*zOi=TGPKLIdf_zkV_MOq zUgobzg|sR7Dt88xO3s*l3hpI?qvw^S4L7I8ECk!P;0`2FME2AWilng`1qj$D>MtDX z2;xyi4(N$;)NjEz0MC3V;8lU;G|=(1RD@z7<{r!-I1Sd9Vm#T_5dH~8CxEy5DKIBZ zfFkx08C#CTh8$b07|7->Ga-ixXvOI10DDpTLPn!~y!?avQ`5!=aLVS>9k6afw629) z>6V98O_34!9oP$pKuf}Qv}eJ0=VJQs7;I<#q)w|S((TTpq1rE*Mr?q|ibV!YN%~>P zlLp zZheo5r=1NGbgx7`DczU0SR9Z#i6CG{KySKNI#>O=B74HqBr}OnMvjAPIzKVbJgKFr6FVPpyX10yKeJ<=S_|TtjH?$f9hVUO`od{mP_}M_-OiN zo>NxWG78xQe5*&ftGl4izo$Vp4X`6PIK) zZH`(r_d~Ob@E-+pWxS$;Ou}vdCeqqGu^GgCgz8!t28e`i2FRDC>l_=jabzudx`bfa zjEa^`!V~>Sk=`OEC8HHZOo{`IMNA4SY-GT+h)Kz4*8k7my9Gy*UT0!M&QK#llqibJ zTaP5>pl4VG5>==hu4**d*O{hzz-BjwBW`1+kX4z5%0g9UO=VW2G1NHB2QfB6koG~K zV-;G(YVC-P^@AhUj#$|%>%peOVJqy|^^5)B-3YDLvbdWK!Nuo+ZUOjF=r+%Uw z$G49Io{Xp{>WRsg-cU&)faU&{SWmZ(FrS9M`p)Z986~&uF)PZLoiBVrWa*3fkI^mP zdZq?6YCnU6JT#z<>MZIv+N2&sY1zs-;<)E)l<8#aNZUzxqq3d=06>F8c-1#aM9+sd ziSU-tDnZXb-l!D)3_)``X(jV&XI||ysI}&&@_XLB+VRxCYOKWWD){urZ+U449Us&t zd)=O|(>^`lzXW@dos<7ua`decIJ1n!sm;mw~0keK?>)E)&gnfg(!JSwO&^&?w8 z4t6>-6PhVUL8qx7*vbejc4h`R(g4j9$Gy*tKw?urvYbNu2S%_El{*UVN&P@HXiQ=6 z^AQMwo+$V_XD+6G1l1z|N1+-z0m75|L473&GCInem-=zMse(|Z0u~+XZ%qA2YcM5D z0274<26Bc`^TJH(N40pg{Ue0rjGI4FKde0B9j1ceq<$m|>$y|;y;DD+YSA}uodRx? z`ax-c9{PZwICen9p&16%q<#dKjtKjaSP_By`e8;f$ZvQ8r+#$5IIjsdlm+()2OT-J zI5gkX97VsVkRAMFM`|&lm#H7@4L@3^I5RU6Sv4a59UE7b`eA-K7Wp+ZGamPq`T?vX z%;nJ|(^5aYHEc4*<2mtkrM_Ik5s{tezrdCv zrdUX;5ERF?ewrldiI~@Ol>MpwV*91`Cq_eS+ds9@{&XTkQu}l53+-1DxsmYbEdK9e zh>UK}D#Ua832c}%b7p7@j;Tx3T4u@X8=-WcT?JJSwo=-~HeV?~A7;jpOC?1vwo zW$)U*8%tx>{=I|O%H?XQvb0huEbCQN|5|KS3#*HZjl$A$X|-0XwTz|8(qa4e4fo1<@!PL9&iq!ER3(=VpHYW?5@ulJw^Ju{ z%&PWumhGvDc)4e16GLI7J$*kYAYu4!3!_nENZ!_QlY)!mpK`w z(-^(%S;$(*Y6kwR#_G&v(DBTm_GHS{Keu8|P_aHUETQ`MDQsE?b4MGNMoc?rX(%}; z*ZU4~jwTID$C`G|w7Z{YM<o{*dbwDd!+rT2Qm~@f8y%#`TqRe;o0a)Znf}}yFY%4s^R*yNSR-KQ3Z}yC=0YA&42$bgn+YaRu zx|@0JjQaHDG3!!Nx;vw8y?U%v$u6Ez$37dACtKVdbY4b2x+C4-k82~G;0PaeTb4@A z((2MmqhKr>HMlKHMnPX{wF;%Gu~gH|)z>% z%G6V{cl|fd&ZU_Nm4fad+2K&d1`uo@p0971L4A zuS@YSUwl4LV;^C!U|=Nx)IRvga*1MgLLgAD@6}QK7T((R4abHR!QDsE=3c#r+TW;1 zRd1MW6oqyMD1jwE_}H|o{^cU7XrYQbas2WjcUjrF#+wH_fDZYN=5wtmvi1<|2O6mzu}eZ$4tM^RGQS$KP}qqYQSw z-x(qlFEax?bw^zwZe@~J=CB(+_YS)=GzZKb3&W1Xp`V2hXaY|BwPRt}KD_me`t}7J z1|u+RfkU5BcYgfb!?6GF%w{4*h}}HEE+}=mv%kLS^zYnbF?V2U@A((=bhEGLJf~mp zLo0>Np^qBaP+-mVQn6Gjm1(2EZ}|4iAvQJ)6*#r|Pi8CeusTtI@>nDzy@9gcF*}aj zURMizbkXVapPJwtA*shtrOt2t_zN#Y0D-55i5P$LK@|l~h1 z)9}ABv)&nlwUU~v<`Aj>f#gi>fLNA|b?>?1UuUDl~S}e6{jY4(FXccNJtwup_A_~r| z>b2#TUTT>7aRG22F|XIpWnM4G24VyToV%v(I(88lk-F2W6Q2OXO6*%`UtxZurw=a9 zk}W-r@U%?Q%V4h=>@}HD^KnCoCYa5%ll)v~WCi(3s?!a9IhH;qOgE&%@FLybLRx>T~XV_4?!RzO^%M;-IvTxXVmSl9q-DFB~o(Fufgea`INz)Z)ti@ zLqa9J*@g>EEVU?b*&Oyv+tbDJ3^Ojw2ZLtY8``kmz6pb$)}vL@-Gi|=G)ZcHl6>9W zMM1`g-dO&m)W$&V+Rk3rGyq9}rtRTol}gMuO=PKq7ou-At=7J_z3n=~LDSscp2uh2UvF7m)8%&^6Nca6PxiLA zF@f@ipv^72F|@h{sGi=Ku-np+X-6|J9WxBc;sC{DKdfHrcW=wy-j2@~GT(QHJzVtr zmiW!}_PZEIQTx_#AiYGFBZtiP_N?312j))Sn^mI$7#}!%XUoRyG<7?i5^?1K>y7<= zrRzDct>DYNhH099nW$!Is7em^+m_vRoLwA4yC&WS>e#Z2MNolZ78+Qk-L*BXzwfmj zT;H-a{HIQ;a7F_{2ud`@>0#mnJv7~Qi`}l?)2qQ#>YGZRVSQi@f|p4?4PIdRM0p~* zCqKL3_4*5MnVw#_&EE>u!phacEgR0}Vbfc12G)*+%o__~BSm@lEUL0|Acl)Z-;TzWIg;eAN{Ima(%V~rFJ%UJ)+O`S$c>e^cT{p{(_Ye*7L`nB!}?ulKsat1_Jd$dIg`skC_NgWEPFu{(3eyHVI)3^%3wP>lrr9(sx>V(=0Zf1;_;MLNFr>4=Tk9urv#%z0lX2 zyA01no!(Q7DKsy z*T|mMeAdrQFp7wWKn@W&K=-g}&%cq5m2J+uU!97lj5=p@12e3t2q-}xb%9lqTH4wk zxufP<04(kMD}x-Rbdj@5;Z`Yb+4n4QN}F4Vts<1cEI|BX--Rnx0bzan0xMd@`4d-&CQ@DuMojymXcX5iK(SfEmq87~CTNcq)-L0Ift;xD1&TN0z z_S8+qXu}r$=Qf$Wh}?D?ECj2CsUd$1Cx&I){QNRn{^S?hS|kaTlV@f|83bfx!#+x^{yxyz3tq+a?-O~wrq_Zq&6a`0kh$m z5bgy|pf{YMr){&DXxRGTq4l8X40aauhPzNHmzIi5BeIAc3VA5nP6e|C&DH|j7JBr_ z(KDNE8;O)mtS%`G0>GYvF8KeF2XWa$c}W(_pAob#^juLy7mX4-AgKD*h302~cR@;* zoUfX6pBNduZcj&du=YIs2FUk6Cokyj()~`2pA@6EMma~Yhzd~IWfH8)sz{0cIyTMJ$}-<;5vmN4h6{MW-x7c2Ka)^ zUM?m&Y=;BB3P!5;tgeL`ksQoQFh&kd)e0|F<#Q)|H5EfR1 zM=1`b0ifZy(}9o^5J6Yphl)r_)S(iA4!ZcqgSfH_&Vo6zp(L2*2ZB2@gP(9Fq0S;C zDUx2;L$|0%3Ou6cxSlT<(7M%!Vkd_*nhYRn;fXiTt-aT_z$}{>zMnqamnzU#p6gt! za9F`W(3rxwvG;NZwi}2Hr)|d+TcB(wVZ-s)%8)Sx*X*_mOloa&eu`W;(0w4xDJoRl zkix*w4yYKVJKAP*7vd9mUfV>MLm|xNqDf!2akyfBWaiO5S&xt)OuM*e?SiX8*pwrH z-xls-&FXnTsxgSV-rX@91APw1f$9m_u3@=Qx?Qd&+Jpq@FEl22;5@W2G59HVv`&7{ zZU3=);xW8Prr_#+z+1SuvSEec*Fw~@wd$_+aC5BpcsrgA~)5!odLvNEFq}x z9g?f!hj=aJtDfkaGKe0)khVAMp<|*TZ(lL=-aFBG+=oSD_Eh$l>WCm>R+pRAnEhp& z{AAdL463Lb%D=m(_hmUrJ0z9Kk{2Fwi+!`@6*={{g(9}m3Wm46jZ?jDK)#W*fG|S% zbX(u)IuO4GP*W)sS`86+grqmWb^TlZUDwrbL(hcB1Zt0QF8>Z(jGAh>Mqk>&)tf__ zP3}SUA^W7tpxx2IFS=$|%ip|r{SK5=-1gpy|M@Sv0xnJ|VS791F?$7ri8|Ls$I!Of zaD0)3@5`PM)d+_HiXtqqpT)M<>xvwJUW|=`QscCeX3m#1YaFYj=_8L)@IL}vI06)i=RL?2Ks_nH{o z#GN3@r|cWPb6|GO2Nc+ZZAmI=^w(>XU5mnrK<$d2xiGgVp=4&%yB? zTZB)(FlvxzG>;`52aqwT`;JMcAPLn$idVviAzNr~+eCNg%1;b@Nc8;49LIZek&SR|x$1aI)LLr4>2N|%(sgal< zE*x-JFvvTyTFNjI?OAgGix}h%dABm1uiuANQmD8d`-yw|U3AVj0K|S;$L*7@*ZByn zD&!V8lxP|Vm=juu@tz&xP<GwEl^}da?o8146@l}6ay(u^(NvDzO{(H6X7%Q1q+Bbcm~~n zpm!l|fF=WDQi4#n(GBLl;b75`ybc2g9*RT{A=_{_>JPx1dHO)2;KZ?i`G$JNR|7Ez zq8&pz;ai0`cz*7IWlEPf(S9Gy0D>T18tf-gJSXXTY6As4ho&Le2{FI>kY$BmBSN78 zz6VL8r^9;ZYw0l+P}dsp;cVHrO^=V0gJaKPf)QK-LWcJM42N+e>~CzvkK94q{t^T$ zkl3n`h5DlQ7JUWUmBEhqIH4hGivMDdbM_L}X?dGyU6uNBL){?PAg$XZcnU~s+WoEE z`*lkDTkyzjWL%{i$F@;RBeYCUXcOkzBNtA;!^ zx#l!8bRl_~4;WvZ`8%Npvwjb%_D1RSsSR z^gU9t^G(XA%lU*Z0(B4CG9?dX%#G<2B9My>O1U1}dfC6a7!2Mzh6J`WsN z<)A^B4{#t0=kz&^uTH%>X=0-T^C>MY_4|D#;(1QJ*X>JxwfO3f?cVkKvuf|o&d=`3 z#>EGD7vFY+R>Qa59Jm zQPgww`*~Q+RQX#!A0ZCuFkztUCl7$+InaB|r4pnQpiQB7j#CY=S{qPH=cQ=d=xSmm z5?LAV2{ub?RhCT^PpLXMy>#*Al8+K|79FEVC=g(`9zFa`THtx7uwi_EQM;y;CK#%j zRswdyu5RxP;RVNC#A!h;fkNSX+gFa z`78d7erJ=?pt>pgfp7QAmQUZe+KGKTcoBk21U#Ltf9&%!?c@zHBjZGqBPp^!gfLpc zK!I3%a3l{T+&zGT13Hq>?X{Vi)DN>X9`_fIyc^xAv-7DR5$c6Rd_jWqWM4BKr0>zB zHI5JqXrZYeO)fC(h)@`Fjl}JhiMk|bv7g>Manqm3kUC#N0g#CI2veG3)0@Lz`cLLE zoHb0Ueop25!093*5(Hy-U7!-;K!Qbx{>JqjJ_9}52Y!p;bWbz1Zj^7}%^A)*!&zrI zYcW?(3m}IG0?jPLS!X!w3}+3~JHf>n&N{r*Co45Cy{52a+VScs*hVd1n`#nT`Gr^MMk2?*7@h@E~LP|+k}OTeQsu-2vt><41v z7@G(sjMGPxbb#kVDo8;TXQ=lK_5N5zUozBtZe!Spkf%=z_5Shi&88l0yvAyaL`nxJ z_v5kPasHb6v1t>r-~n4ba|pE`HxaK2q+&Y0@mWMZ$e)WS2ojG4`7``-@Y+gasl041 zEfq>Djas2*t|E(RrMz0GtTt8}E45~AX}NOP`E%a;)1$E5!_Hse`iF-HpAar^c<^Z- zA|Fi4p&M-szE0Yahx~|=E4bMHgXkuHm)?#=oIE1H`xId@XNp4qi_Hv$o}tj+zIFXF zBs8f{$E=~_>AC~?4S4)4+qY@m=24A3bWDo$427Pd(6emcS+;K+D@=#S zFlcp-M5#TpY~O2ImhF24e(rzGQ0S5?GEAtOW&372RB{qOzji!(PL}OE%l3^NIkAF; zQSwq$^2@S)XW723oh;jTmhC&s_FYxD+)*@pmhC&s_U)I)2#?F8eKeV({q%|4$C6r) zXW1T0ZJlNN9-ERZ+jmw23st{UTLUXYp-*<2BnpuNs+c+y`YB@qo<#qd=ldEAgLx)N ztXsx(3gTu3akGNB zN;^wWG{$S`gQ-1U}F$0fLB@CL-7SL#w%qBJNvuXNV%;ZXXJ73nj5_ z6oD4CO+C52EtqeVFadbPLUCor_swf&aR)CJnKm)&5r*K&xkUJ$EqfC+yHUCd|If1K zaLY=a=Li_bY!N{%S(P(FYT6A^m|im2F`fbGd~dR_J8Rc7qiyWzy|&eCOU)ou656uW z*DgwC=_uB$?;{nkl2lb$5*8GvbH-w2ct`A%Xil(tGA&<4VA}*Y#BCHOx{or|?_xUj zX4C9r5V}UxFRMM`2ViyFU@3^(@w=hk@0W>Ih(NqmyhGF^WIa&|k<&!w9Mx>4%q!m_ zo%H*TXE>lW5QLydD(?zVd96DqE%X~n9b++ma3 z0w9<+5@JirbfksEf};#QrmyL!<+AK1D@uU^{KCLPm-P`@n|xpzLlhd8nj)OhGYqGQU%%^Z&A(9v5m@TrK-7Qqq^hR zj0!bua{Tz;&>h0y3*Hc)a!lp}2p^4MS7%Z5zK)WRd9Z~dI~!Jul=rx>DWg_8r_{j= zGC?BBRAY0Y(!K8Py0Q*!9g7gzk0L^GwF0W9v$9vq3ZC_s#lK0`O3`*yL0%p)QKg+3 z8O?_*f+`iNfxwR!;C+i-L36VG8UOEbPn*4lX|PbdtL5DxCSG^7H_Tp7m*xHTo@rOK za&hshd@bR_T#zN3eF23{np4ei_-f4^_WRDj8zTg@2;192C!1{(ogur!)cY)fJsXDg zg15J28bLL!uD-)^oIc;iE*XHqSw-`iv1lz(6MBrkAJc6;KdYuj)viX(`SEinI_+gr zFmT!_Psp^#NS#q^hk9^q1VIUBwkmJ%Q?+@XO`I63FQ4FLo-l}eCRWVgVRY0%Bn-!e zy9MP|Tk-|$N^i<>AzUBakUS|EwBiPG@~FDimhF+Pv|3>Gh&jitL2Fxfj;p5B?Mhy}*%;|9kiQp8{;=VlhB$O%F@*N9ipfC?2OJJ)U>$+=^>9FGY zXyYQr=-LB)V42=N=O*GDY8QZu^Q<6%?Y8NW(|P(XHWO-rJG*4%eu637jdOR1DNXcS zj&#RYu$3f4#?JSG7Nt9~O~FkP@=t#sH(*S{1_n+q8dp))$dBo+Bx1TN_2tIADpD>O zBYsy+T-2@(t*#ODEqFbJG+BOiu<>Oq4y+K1C>s{GlAjCO0Y5>t;g&w^f&rPHC%lcU zQHUjcKpl6eJj}zU?SKWzJrdIEuG9yJQia$~pz)T7&Sj0WlVN~7Z8Y)o< zlO`=-9)mN@HM^}sc({kl-$XrhzMhkKY_WFKSj5UCSw!#}MM-+s6^T*Nl8RyUoO-X@ zSBwd>klgT!-KZZiz9b2%BTR{gYBauhfu~%{2zP37T+$<%YoP-!5_3I<6Q7r_n1+y_ z#DF|eVR~85kt0k)wPtmi`W5z~mg^)X?NYe8l0+DTweh~oU=&>k^_wdPPME_e<{n)SzwoP}vR$5(}r&u<6sbT5v`b|)@DXpU!C9gkrGfH@W>}J%~ zk(2td+v+=RJ(9%G++D|muzAOEE%J7~dm|Ln$mldpb_e-Ow8AdbRUxn2DlS(}4&j}9 zrnS@da6-k`GTNb!x|T>*=&z?*W~`7gaQc`R4B)!@l<%*XiY16>HY&O`&BS_Yr#+Y# zM-JvvW4WQn!nVm~g81Z^Kl>)Ooa`Ei5Z{&w0YeTM1xn^(_aY~Rzk}5sn?u}SLAJg0 zzD(4FX2VT8;kfk=_D}&+dkTrWt`=?ur*O1B=$Ec31SPo70}J|vAJ7w=%SG)L#UE%W zzI-o-hj6d(9a}be3%Q!vcW|gltIYC`58x~6==U>Y2%T8nF6<$c9-xo(hXW9y3A+K$ zPi%CniSr(M08SjX8+5SXcTnard^dC_a_U>^R$N0 zAp$dlLN>tgfH8oxThto{;5mT%P~-8n2q;c2WKD=(gCUdAiaDq2Qri%*2GbEt1dl@D zxsVH397m2@sNX*6__q~!n+8pLFu)<{YL}W_s~&vi(F4;GJ@0Sw~dJgt1{8-i7FQb^dQ{1?*wur9K0l z^g=GN=;W%5o1@DDPTECLPt`I9ol}dqy*)Y<_26s3 zNJ1je8(2$NO+Zn$=2Djj*@+$;>kByy2uFCbiqdmnJ7_swhzDW~z>i=slI({23vwtd zb^)U@Oizbthg%q}Y}v7f)x9{tm1^yeB;XUH%q)74FEvcFUog1aFNt!^`r$@By2l(` zRw{W+>>{c7yNd&o{RQ!cib$w_K){LXbz~R9tMc4Wz%(JLHz6|am~yDneg-`YZBvw_!L-@7%m-MT#y-?FyaIMu zN^$5u*TH-UJ7~m!pui<(Fh&WWfGMVlyidr@H*-=iVj1lt$ zoK+8yt;-!Ev#Xo7XAvo<&P3r@-?DK^g;qZtOkjSnJ9)5}vnOinN2KMOu`oB>F-D9f zcYVKJbVt|O*?|p_+W}`Rq>Rvzi8)ivkL@?#c4=eZsGSG9AsPX>iRhy=CtVQ}NLQ3P z_A}q3&CBgUtuP03N{0)@B{n*>jB*NSbC%LMOX-}Ybbg`p7e9mCvKOP=viv@lR(2!$ z{ZH|G=ih}lML#^Xl+K$@>eC*-&cjR?*Ss89JmHj_K}L+9T+i&gsUODL+RRM3s6o$x zR00{E%-J@Zrt8w_0!#2NTuuZD{O#hFEn@@?>CELG}<@C_y6 zLy%SiJyko)ZULV{@gHAW!vV&Df1`CN&dj_?01I>i-A?@o%Yj9W8m(`b4-Qo24da&b zO{y{psUK1aN9p$VwsO$TC?i&2AV|7JeBWH^NA#u0&o|;dOnukC+L)P{&bOI<23?E% zKS|dshq#Gt5*TFSZr9YW(1j8IWXv&~`o0%%ij29gu^5y6)~O%P+H{_E4sg`1p8BzU zb$Z8t>c{dm<%wnSAmvQOC)#>Ppb0{VsUJ)wa^=vloy12>Moy%DOqc6GeJKCR6xa@| zQxe#b`tdCUAh9Hlp*RzHwG+6L)DP$)VJRmdV^TkeHJ=&0J4b;aLMFtpCr3d~Qa`pb z%xu60Cp3YXV^J~b9};joiFG;=L6Z6*<@_ay*uW^bg__Y!`bF&(@qQC<3F%Zf6reS( zg(?Hjc+5lEm*|J(7e4j^|2tUn!!sy2h>a?axmV{=Q45O8Dj(*#u$g>Z+fMr;928`O z#4(uLkq4h%_XH3$^}|_|FhK$q`m2)> zL+0?8ny9;-&tZx6f*>9jMA0f#i{&_AA7O&`@9hIk(@SQGbmba;<dUX(#o z!FvEmJob?cN-_grW{7^pcH-y&qn*99dE>1+^_#bD-?%)xWd|A{x7}zr_>{02>X1R~ zl;st3NbZ?|EHjX0`azZ|1+8F@B+@JYczaSYhdyJ*)^j%=n$4l7b`5rW1ZZ0So^0sH znjJAsVf&ftC1`(OBGC#CLvc9F2fkp%n3!$%-NO$L^X(VgFSS4MEz>ME`zks{YMA@yR*(+5GGR;?oy^IyI0zW}*4P!h`+x@_yMUmmi*K&pyAivQqB4 z4_&uyHy4{F{I_E-Y@TVKzOdl+`YXm_bH(j97e4sb-@bQa@q?yipJ}Il9*H%`G>V|5 z**V{cZnTPR&{uwL?kxM({@wU?{Jn$Mn&nl!WR%UqQlnNY)JiL@!s<$OwXjlNtyPz5 zmBr=dmBaS$cm4);=Jt<1JnZ~`_;B#6(`=*T2zfFo9kG}vOs5-d)!%FXe)~s0Gx
    Ylc8A_mP&IVC;#+HqAYdC%fNE!qTb<=DhWCG!BOyPw9MChf^ zAQ>C2y9m!`c%VcnFG{rIC75WC*ypl}8WhB6V{f4Mp$Zbns|1i9fZ1F2TXN@ynnNtKHjW8*j3%U=W>PYCf=M0b;@kCwaqo$N~{*Y#}f6YICgg>L&S?5o1aihZ^ zyM&I`p^2@1K_c=Jnwn~l(R}F$@rx^Ts&C);Ec(Vj?Tei&{3lZGM`LzaFZ=MVjF$1~-N2G@-*EPbD=)CC82MOL4U^A#Itz=%%W z_8jez&P8}CV(M_`&%nD5UaPhm%S+Xz#X`ANZWU@qX{n$u8E_2abakdSRC zzOip~=84;Z3ZlDypH4vLSvg5uFK~ltvq0|nXxA`_N2`N|GXP};*fz6L)5{G2vR>= zoSSb4Ij)n78m#yLy!M#;2xT@bBr9#|9ukR4mO&yawj7X8JPr-vsVhgd7jgMQqIAQ#c|a$n=NR{k%l~p`9C)c;#S%)`qCAn^WEnXF(T7NdH*f)SRGT>7$UvH5H$_7T>{fxtAibFZ)v6tduGh z7BIOo&>z4Csog^fIO~B9bJi8x*w=p1u~Es!-L>YCI@^Yo;k!CYNvUE>-*nnG1nD(!;%wM@R zC%RUT#Z{0dqzeEj(?Daa3bq{ekAu2)<6EeaYMhJTa2E984e#XwE`fO zy231#5)MZ+106-PP_h^8$b^ukd#2uFOV^>&HlW$J?cHmaq0e2$-WQ!yOiZ zug+@-7h1()A(5yoffS6vG^*mWS5d9m^ctMdE3WH+9vL3RU$2N>uWR|MSFR|E3sQf#K;ybTIdaw7?+x3{5{_cAn!9=eYk73N+%X4?YgXD#S(}9a{Z3 zp@-B*C%n0Ils0;lK6pm`{Do-ui5@DuXIf408BA>)T#rR%-1=zjY^7o>)@miQP+3{7 z7HX@DKnkw_Gu$edYpv$;%F=S#$aKKm#;_5sm!CTT0NX$N^mAg#&U8QzXC_WWB3|p3 zv7Wp1_S?6vU&a;wpmO45I$)*)W;)=A=^)bqQ>FuEtUP1olik>vIgi%K%$(PQpIVBwH&Y9xqz?=pIL-I8W!Oo^P-n4#OdsuLuBt2>?<9+QB7M-r6u^@KjAxmR zH?7#o*`s`Ak9d0Sh@Dz#Lu{7HwMDbpS}wF&<;6nHL|&hj zC15v=)#g&kESZ(nrKKsnbzcl-dvv_dCsOmJ)FL%c5|YlnEthyJkpKx^AqG0bVvb@l zpBI?gUBRYmCt(I-U|mvw02m+F1~aJXM+s^=3f@eqZ92nZW?0M&i+OYjs05%ijpb&t zWH;^CHrg~OOD5(MGa5hHY?xCdyLsgpjDBPwPHE?VG7&|P`tGVwlPTjIQa_sVsUtB` zKctuaSdp{s=$R&q_*B|F9883&?U-J@;aI4cu~Y9mh)a`5G~IUh5P0=Y8zEYVf1`fm z-=wLV5G~Yqy8E3W(gS85k#p-28C(6)P@20YljRlH#P7SlABjQym2+n0S10U5vlG+Rp7wM%K+fK<84`=EU z_@cNeVmpu>CJ#T;UcDkG2&c*4pX`-47^W`8#7bkOQPqvb!lGGQF4S5QUR0?T%!;v6 zY3a)qcrNI$_{ip!57BnfRr&6{3D> z(obq#9b=Sj&{F31LoU8ZxN_J(Gu#2DW`B(19g*+mJ;jG!&U*B~4cY@!LNI)*;RTMF+ z3iR

    Yu|}%R*b`dZ~zts$~XE;WvC+MFsMn{;=*jyQb}~mnd)aO=K#7Kd6oj40_kw zUoRI+bGQ%PLZ$+}hbA{MS#m{!uQ0ln^?c^pp0t<=2`ZB$Nh<5PkFX>;3Cp<|Ph0(T zFpEdTX6KGkX{q#*1O4cqT#Mzmd^+8GYLH6(<445wRtw9etA$(0IXxIQ$xiRz+jqhI z-co6~^8=@AuIE(V8vGjaK7fRx9z5yjTm72*)hHHF7*9o3|=&4%k*>RArFoJhg<$K7jNV*51 z7Ws^LemaB0gCA3wpbkeXE*`wxGm*5T=^kzzd;+PeZSOEqdGTOIE=qon!}f)PPZ(y` z(+?BL6*>nR%6q-!nGe0g&Tr#$`(h|E%Xj4rIQa3dv(q0q4ZiCqVzq+~W7Q7-9v<)f zCjO{rqIM3$YYAPo^BcIn(c!O&es|bVyCzMIcUF2Oe-2*40T54e%>Mh%i+>~ zwF@b^bQrB>g~iM&dnZyN8Cu)f!>|e9FklJYN&EQPch7VI-aC=jCvVG6MCP#)?6QA4 zQ_wk*Rp>9K?O?m7O7@b<_RNNm6cfDhtlMilLVeBQ09H|t+1qHkWTrGo8O(en=WBMv zowLe9i7IgoBoYkLx&i!estbGGXB*$63om^Ryha zonPbO^XvFCdX^u57G8LK{xEtBKIwBho>oW+6`xdkIy_^CoxjGA+ZPW$h4p~#0b;H< zbPwa@l>a>^hIgHKen0h&OE=`tK`~B!MW%tt3lXi)PGCW^C=$zIv}1*orytf7?E3V= z)7YX!V@o7`g56Z5xH&v{tx~G48clPhptl-kp|)6FE;Jf7y-=$*S1X3zSZTIekWa%5 zIEU^3_TZDD?dvd>1NgHOCMgCc?|ku9toa50c?Ey`g_jwO)oDF?1|Q|bJH-^9-I@HC zpMQSOOJ6o;)L?=N(1=I;$|yVf4SZgtIh7_@X}fC6ewqzLx=gzUxxb5Gma8dgf>m8;GN z>eH%r)p22V@!G+In`X0ZJ5ZnZ!)L!~+P2j~FE-3Q7>jfxxc9b+xeRZ+4natJ$85t8 z5Zn-0+rxX_u^<{>)4g^RDUs*3H_;eHVxmplEB!t)&N@8{vjqe}^lLB{CSb-RJ0=z& zc*9-V43LjD9H)VV#&0<;%ukrrpnV#=^c}bfv>V78s@>R$8n`pS%%b9r-@v?f$LU)5 z{+88*mZ;&~o)@mq_|5(_UqQtc?H&vtUGIaSYosE$31X{6qWR7pNzehW!M+Du2%yM4 z?Ml}|UE`>`H}$3oD8jC`VWF^)4(bN=bzKZUy2q!etE^uJf@=y1*EA1X;^h&pI zU2k`xwBv1jX)q6OsC>~XTeHw<*VOKy0ey&jsW9Jx$HhVoAWwREOG8A2QgGtU+`Mw@ zZjketTgPppDi=0Rskl;JEv>FraBZOP)%_igUs&|%!+DfB2)FNJ?y<+-@+hsfTlgT{ z<4>3c{Ib&WMAiHD&_r(LyZV0NDmJ@Zl0R=c1M34ADC7&Wcgw%u0o}GNSR~}z9UX*7 zbuqa29pwHVI8M)1m$@h`!-u5Iz@-Sc)12Xe5h=Z)%@RQO+D>2HEA6PhsJ5}|v3nEE zk1R?608{zXv8$U2_7|zM$T;1`eu3#$!=R8GUGJj$VUTtmR+W&VvHL2eQmv@nvbAqD zJqP@iIoaWE3vU^i5Mx2`us%7edjh>_bxrUyG)&60kdqzuJ+e5JoE`i90q7TBngi^9 zxuK=^EOZ06>;1mEO-HAi#3~_@3!J0t^tnGJ6?RuMAA+yKz{O^$n^C9lVfIl~4>cSW z!>pk)B@)N)ptR$@gW0cKM;dhGCZ`44+0zF=MD+khb!qO7x5t8znm*iN-Xe^Q3-`bp zctflJOWjGeCP$3xS_9?YmF3NbnNu?#pg3BPIN9&17c%LhTHyqB?Q~(BH{IYGs- zZ)ka@w#lsXfh8m#Ec~2ypD%rP!Z~h$ra-JfQthM4CXe!4-^OWfdIhXeAw0+nO_%K# znAN`Evf9U!{glF16=QT%Y3D1$XS`a1tV;BS{PlHB48R#-#nI4xz67+-qcTKae z!FS!<<;3x6Kk6)OfK79EMBQkEu)H>llHzXZq7w=ZV_p1aQqVZMqmAIiVb|tqz!>Pt zodoAOUzgmmOaoW=Ni;O@6>ts922py2VZX4nSZH+ZLR0UP1%cVHKNgW9U_O>xU>6Ex zT!m7(P^pcw87_@>CtPGI1+vs?SfGpE5Cm`acOk*6qb*<{IUJ=9_#Y=#-=Qu*J>TL>Ju8Hp;F>szXga>uJRgOcJbmxCDmMaY(SB)@MS0)il#Rm!iIYNa4+AoAu|lY*R`9_ zK%rfkyP$#=mvD5^L1;1Q{f0;lduzYD0V zhB3#^9{MkMh=I{z58^bn9LQESv_wduP}4!N82&q^0S!eFBBdwR@5Q`={4n?q>z3@v zuyh#~$$Zj^VppoNCek}5u13aQIqi^&iDNs%wo5lqbt2ay@*3Mx4QUbwq)xTU6 zt_3trPJPH-R`5izgM;Sq(g;&|s-6Julh3gkLN4!f46zC=(mZ1{)=y&zJg-i&XO4a; z!|@QHU#{YF+|ADK$|*4w`ehxrcP@v|M0kSL@LJOQu!8Fw9sZg`zpx>oU)WR<{ld<6 z*em&S@FLWZ7_K7fZs$7g+34Kh->4hXKVEyLoy=`*eesHL$sN>d;Y=o4#mI!YI z*b~*#pd|sap-RTAHn0TGz_zD=+ymq!Jr?o&XpJpNfy@T=o28+PUg(zB)!N(0R>h`d`;7 z+WNXyI#;9mD(tY>46mb7F#A3>MA;B(jOS`xU(nXIMd>$OU@!z*+E?-am)6hKATDZl zv>?K?UdJG~PQj|>t#dV?vi3EtGB-kLqwH5!jTbHLYv*gd%G#22-akx)JpRbsn~BaKt)xgyGM!~}$4&leF$fBa9a)7Ky?gk;If*o^)P zu50$W8tvt1oj4Hw=aVazH5?{M zn#+a7%F+^2GZR(2R4Elo=1RHQS}d1V^zxyC!2S)a2ZDnIfc7QoHHW|SuW;#6q+p?i zKvJGo_6g3Vlck9x&gUskyi=bcr{>C2^SQ_69FMmbed=~2B`m{2hbai7XhGP2M7n^L`k>#7ei1VhV~wrG0BpZj_~M**wYue#K1K{LcU4s(tb@iW3{O8Eo)W@Xi0oE2hcZ4(_<89HslLx`8l#sP0b!SZI+_JX- z)Ef~y)m^^SqLtKFUzAf{RCh&|Qy1hFar+6yqv$989LYz?>j?pA%pM{gx8Eo12UC)3g{at*zF&Nm6R(Tf*P&Aj25#l3AEde-b zAA6>;H_-bz+`SMzh)-zk(VF0cvso&Z%!^AmFMv5y5plwsn@W4ScqS|KEmrYoEVl7j zPv?clcfG~z)Z*pLys#s#;tSfiPmWI5#acP-9^`mr{u#Kq&(tye#!+c*&n)ZAn^m*?7)^f67~q%USTdxS_+le6Z3##>b7$f6Bj6|DH|9 zr5p0+;0>$C7$hJqYy&`L{I74!%v{uNlP3{teC^(MgwxF!EhxNw1F(JFsDq#9XBPlD zUT8Up3mUkG9gZ)a%wYQQat@yUYQ+Y9VPVi%c>lr5gT0;o&eDFzsCu>jp1Z&5!Kr(W z#!(*hkSnvaV=rtjc)fn3Q7M;}D~k)p@SW>k=M5Xd3W$7wVW^1pnw`VCo~z+q211^B z@kxfjxdW-*LKxG)A--J&VWAwHj`*jZ>ANqCRuNHGMt<(xjrEKESYzRVDxmaSt=J7& zs|l;ZVX0N|$LS7%fFJC`7zqM(B@|o5&tkF9)sVl;nuMc77)ZDn@_8D}ky&1LnwQ<)uRh)kvZj-%oWLzdWHs=V&&5YR33@ ztmdfA`$V?GNdy-23%z;~C+S80c@2MjK|>C-SdN^d59WA|9Fatx?8*F|lB43uXgidn zI+X6@&t#FW6NyTX(nI)#ewaGHP^KC`mTLUGd%Y$wNP0^q$mQ?=jS1#)^TV8aZfZ#? zaLGiXRJ?&P@g&LNVh{T#e*EDHXa1-h7i%DPhGzaCpA}hc`TF z5Q>ai=^WL|zV?uIm)5~&Cz+|zKD)0ZHL%4G++KNqZ-3$LFJ9eE{fw8cbju$=fQcE; zkir4!5^M(vp&r6sQAAcBIJSU_LBfVc`U7jft{KSE2Vax!$5_&W21_n*U#`Nl!15bz zBK1o)3kx>FNL9}*8yWGKGiaU&onTn0AHz3eB@ zI9KBm6lm2K=MJim1J$QZ(UO>bw6HMP#z^g}#tS zPZ^>G4HmG7)Q@99O}wI@J>~?pnJo+K0lD;WurqWU8^>v|HVnR`en3Bi(ilkONxB+< zYav%_`uqF^}ZGu}HJot)%tYf$g-1Gi?>PPiuIUEAe35U1sxZ6noWDFswdrs;Hu#9ws zCbBI8U(DVMe-+8g#Tkzr5*Wl_-_NQh_!-Mvo0-|(PW`~Xx;^6?_Z&C)lytsxto3uT zcYYd(fb*sF{Z!IrlzM9Ud>oq)sZ^=&_bl-U38imLrAqx6=4NMRzmq#Qv!ejL6;}T}=JBF6Yth09(LtlW;6C@4*amxBF4c>gU-_ z|3o<){_uqt;Eu+BVLVwnja4^B><<9*Y;SKvGgMXx1J$l5#-DIzrhYva#!q#X+Ezv< zs|O24Xoqa2en4M}q?bTD^bJU7+x0UAs{g+;X^YZ)Byarl9e0^|po_6HB=bPWJKfH+B1i3tJ(oHHZ_Wpw;-A{*19Gw2-w$6Gc8p)EVqsl$(PrJA>eQZJR( zIGypwW957mxjY852^$2&CLw$J5af}oAs{AkZ;Z!m*&?XM9}6Ybk-adBSIV$KjHi+S zCr#@l6Lvs%Fa*a$uOqv|VycgwR^VNLAWp7iey0l`9BaZbl|1u_qr^<;s$={DsunPn zC@WBz4BL?)udK-Hc`ow$ydYhkc}Ueie=Q2B`a^t<<7@f-6d_grJ8tj%%kY^9fAGJB z*ODPs|0}L2q8q>4=?L#m{+@sO(jhqoLTQuXJ!p~Jg;r1Kx)<3{J-@Nd+= zXVY=%hWt6WHbHa_EWJoG-vA{>UyIgSBD*rEgnua{a88vr@vCM0e^Lx z05N5FJy+xU?1TWZb2WeqrO0GVgKT-YW0=+9>cY)?=V~04&fa$`D=XnKABM9b_Yn#O zA$*ZOqTF{O4dig%cZWUX>N`gpNPLiVn^JN?pyhovly_CYrRUUx-(c<&cIkVvtPb^XB(g8k3c5WnH0 z-avb>t?#g`X=6XH2y*Uz?pA<`%e{-jzPg>CLqM==dKji*KFnij&(&xbFQPaYPGRzo zU>qxUTzl0!J{zNGRe8s0#fA#O@&ebw53G06s z_4>xf5hGYs3uH~;!FtXC55m~b)fm4~l;XYl+!bH><(Q%TMgvsq`LL@u%xq)qNne+47v&twx@zg#+H+sBdJ#MAm5>wh%V5g4j zHcaJlKlpvJ@i+W+K2UJBU{T%5`B^^Rpt(+K>}$b&l2-HtLnU9EwEiS*e?ePw=v!!P z*gC@grzYz3HZ>&vIMPg|ew%Ek3OYB*_LJ;xX#LsBIWl2ic z-0-wf56757M(c}5=#rQ@WVC-z)?VXdN{;K~FHDO=ji%AH>W6icQu8BbGKm4dQ$NmIahP^2lgVUg`f+(e zQa{u;lbK8&-7J*)k$!Dt10I)!BlUy%d?HUs7=DoYet$8{6EYGYk@}u5gn=W|W#LHu zfL7FiPM?J%^&`{N);`+6oB9z{PH%!v{g_^x7)Zq2A+gLHsUO8sfGCfLIR)2_MtG!t zT(5^wRSeUbpoWQ(llo!&tz=aZr9z~>*Ix=MiNt{73a*p-uCH1FFjc_e(`N2KUX0Wa zZ8<8r6K;+-sUO&?MBHKklU+!Du*D?saQcT9I^=TjfW_g>!I+N%4X1u+d4FF4svV4F1lsIh zPX9;(KVjrPjSVVt6sCVCT?|%2c{3+wR!aSHsBpy*E6n7CN~s^q_0ub8VZKoc6EX{_ zA7w2{yp80eQ>G+1+fkhF7 z-P`Y*>p9m$9m$>19&Lga*+7u!SG9?|1}6G4FpYHv8*vydec1KZ{er-`rt07n?`^1p z)rom{r)^>ikd>Y}-8m%_W+a-0cyDg%$YU6FMfcVC`ir0N`KKwJD8|NJ`sj6p7b0~a?shxnV!Lh<)_M8wr{l)`*!fL`>``_s@}23&$Np_H^vw8 zv8|i>?*Ht1EwP@xnY}(TsX+`e*Gol|5-;N#f5W#`47aC0tb5Kbs>`gGC>QmS zp@iaGy>Frrg15h3E|%tSAG+lsOy!xT-K;T#9`mV|6 zbC#fN;$kMGs7&Hjl7A-gCZ%iY9ht=YnYeHw()bh-?~9nbRES;2s0;_SV=NrBfsoOI zB6bEQD#}6Ib)oGR&vsO2Cj#Z4p{Yu{(%sYdU7TY)T@7S5Ma4LtMoi_@WpzIscpfYa z+8)eRMJM z7ij1R#vaNwZNaspd)cQ^HY{fQ#79hq5PGGqx+lq zFcAPXQNGT6beJwoTVfubw2v;c)@9bZ6Ko|<77O=$SnHbR;_70Hc&w#XwNNW9E*2U~ z%Zr85stIsLY1wG5`tYpGTDLK5bRrz)BO5bZK4FN;0M ze6p-kc~X49XxhVc8Yku^7~W>b9JR9mC#Bvh^fSJNu+_4f7C^k`!8WvvEX?j6V}2C! z&lSrjN);oN(Qp6)qtTOYA02@|u|S*DFUG5y-f)ITz@KIr6_ENNT#&%SSr|rxAVfm+ zxFCA!`~ErD7xiusI>(ZPq^8t&J15f%gAje%ySB4uCjpZZpuF!4Q$LVbH8lu(Uvsdv z2SoMC-j?7++ce|3Wzu;vd?n6r{N1c;hSh4Bh`jWC+L}-OQv9rTOWSjXT|;Y_Se1T% z9|r@A-7^7x>U;kwAJUOZn>f{A8j zQVNj3@`INsaHjrb(nLI_NUWnAs6aL#arPb2U|MkerG5kpqIJEg4_|w@_Nly&qAVKkI%B2ACA_AF&nfj%w zNoqzZBSqglVva#-%$TA>N2bn$_+i(v=d}jx0LarKw3+%*UB#01+xxDpS-69+AXqY{ zYlaem;`T1Wq7CMu(Gl#t{{cS3K0_N0VzAS{a<6I#b_1yin`(nT#$hYy3_4_An}k_3K5ki)IqKz5%X*#^&|bF0LM_&%{05nqHlsZ zkWK8qdbT}3ySUK*Gt&mor8YPbay5?Q%3xwfmb{E9RX*xTyv_wo>_zwr&-Ngi!V~+s zWd1=v>@@Y=epUEDEHD<(%<~_1S}j#pSF6oRq1s$rLfC1uRajYBEf@5b-YmB&rR7RB z3p<_Y5_&`++d>9pgBl8aInr0%h3e+mMIg%SPOHuwo3Kg8zJ;b~N&B%b`0;BeRiB42D*Twu|6jR`arYEKe- zR0}ItwV|7(u4fS5`OMou2wK-Pc7p8aqp@**Hgp7|C%{th1pq0fBT^j!)y(LYIBB0s zQuLyBO*tn}`P;{JpKXS`Ligb%xHp4xq zcrCKY;{mOVrms6qNlcG)VQcoVS151UexKJgt*TvWdtTpNTUh8aqg;_G=88?Hr@mG% zZ`Tz}%QpA4mfo|vK&7{|o@2lt4k{UTtRN!%Oky2Hmj}kcpM8%j!K@9|>#ME9J4v0?22*vp?iG#d7C-!dF`T%1> zcVw{AtX5sqAgeP>0*Uw#WWeDvbI;Io1{OrAJrhp&E|wmEtr%1m$1G;M#r3d`zk@^(1_)flkSTXsR+6sQlbw!JfOhJ6fFHcL$5 z2$x+SJhUDZox#q6-f$Nx)zV_IT&>g=W#vzy(g0uJ^rAi{`_cwr1l5?Zdx7nF;57Ay z8h^fAS(&daEb>+AtePctBi<(K*rsiQosNG@d0c#<+XJqi(ff(GkOvu9F+{GH$ah#p5xsbRKt;G90?dF~JH*e*n z&HGW9G^Gc}s0Y>E%t6mIER>_b;v)%?D~LXT`aD;A>-HUx**?%?aU#H;nMgZrYlmPn zG6n5{-W>`)2Il+bu*=rL2Efy>6WQ|i!0~jTJ^PL;#VWA1&0fPa3^1M;D;n8O>1pwc;^mzW;CT?hl^YG299Eu9odCK3-q1fzjg*-LI80MwD?%i5bBbs zF>VV}4!bpS$hc||-33+21f9T(_yYip#gHm|eDDd#Nz`yStMg(;?;Y?Fp1uEtFs*nDlY8-+e9 z{`_p=DW3v%P(?AeLxox*QPFUk?t;utp$UtpTYxkKA&dFD3L^A*_?ZMQ&lM`d3pqo4 zx&O*&*@S%&X6X{INHP(sz(jgFIISctHZT|wct$rHCn(r#m)y(uZ*AP$d{?`4>E3tm zUB0aLAemJn^D(Vie1(TB$j~q#$+Zz_gy|16B4fI^doNvD@Y_+}-?Ux@B2@2zaNEP4 zZWp!t3+jb$n&5vN-oSiS+dxSoQdQ4HO$V$89?JCsd0FUb0EUCGa|5C$BQ9FU}Y)v=HOTTP79+Z<`mZ5Hf zB*Jy3u^c+aekA0n^})mfgG`CJ=?(QRZoobT&qb(m5MwG*Jc!5&rms@eZkyf~&a^$f z%M6eZjCWY51N`3lK#pTuGas_*6;Dsr@QA20+z6x&To0Ed`~m9=l@13g&Hf;uJ2iZ2LfCLhR9AofisJXzTfg4AgR0?~^>I`k4O$V{p>J?l#a9A+d9W*(@WXGq6 zYr%8?$mj~bKa|HH;0#RgK}df}G@%HmS`Sy`2o_w_-Y{)cCj-?07!4*Aj*8<6d9!e# z=p7(I-HK9n-9pSu3E1L!>d1V>poxf zYpDr8+>FS1q&u$mm60`?_T*M_sPG}Rfm|Fjw}Ma`oO|3ckRd?>SW+=C7a`xtvWi9o%aF0cd(Dt%=@oe*Y2zD+*03*KJx1<^WD#`X|t;F z+4+&5r`5p%9=AAw$5mspm3G^?dnFWxD!!(A(#&!|@2E0CsGCcnFg)}CGNO$IQ8D;C z(zriw4|E(Bp4ed`mI0d-O5(wJL%D#4BA?lL6nde0JI*e&e=;Wa7I*s+pipzrZ|nv9 z>UYUsCTt46mphQ%LCfPJZhVKJ^Zd# z`l5DqXd$k|?_2PCgkq=}Q3@VcmfOCrxafM+Y93lQf4wL>(1&P1oDi+cl}(>~Ek+25 z4po|(|~7Iwut`%8;^fTtkw(=U~#L1POH>k_1&)79tuEtffZdtJ!$UrEmxazymWm zF6oiHIb5W~dgtXgOoPo;ggoP}$4Bjt^<7y{hsjV^X_u6)}r1i?w z+x*7Hb@u#Zt1FN_Sq)9tG((i5e;#3@VBe^Un|DOyzbRLWiYL^&s3`)RWb7MTc#4U9 zFA*{@ZBL+QtCFH8Wc(ea51988iEcj%&ipdVEm!!bia%o|(N}PBqqB;?$&%>nymFy) zng1ke4M=-eak;}k@nXpQ?w4!6j*lCi8~hvjFWICtI9jNgZ&%lnX<(1-TkXWY9enI| z%o{#c@7UvK+QpKX$9&k3s~u(br?9I)N~s^wMRlGW2cwFk-_BrJ-aCS? zZmgOsMyuKIOIQBjYarxTAjy4rn4#+)J1PEun4#;EML9zYDb7V0jXZSq3Z&ytMAWNp0f;H=eqC>!dos* z;tX9EL=0ug3|&X~m`I$6A~{ZISCIx8x-LW4W$3yLU3U)% zbkE$`4??rkl2%SoYEvh+ITfbu{GsbIq=1UfhF!%4ycXzSpemnUR-TV8kgxNf!=oGF z2+sgAHU-%5m1B^6sR!+?#GD}5)>GrbKFUbGfB0hR(#}Xwqvs(|fdG5gL1=4$(vkoZ ziI3O}B1MWC;vM?{Zb504inL&7e#J`EPh@BXYEB?-;-SaXVdCmWYLpEGS!SU6oPIQ@ zo-_1RPjgv<3<3|L%<|cd!t)P^;)@E)od=9v`G|aWA)`t%v1gDh?uU1X@5X|f6YbUpZ4#@fBPaZX$P;hD&3f>oXn0oAp4-TCv3`jGn#f_pSS#dDGEBm}%>k2bm;;1+jb38RR{(LFCTrS7?QKBA z8BzTJv4t>(%b@)1E>;6E#?X~GI)t>&UfR6z)}8vzTeojqp53yU-hpFh5c_C1_>^R` zp>ZAJDU&{6UWz%EjLE|LGD&1jljuRzBkaW@4I~hu&J@k-IfP9^B*tb)#Q!m|!!yz0 z)sBtz?eX?xVh}eTn$027UUAo8fbsAP+0ePinH@ID?Psc&p#7;tu7j62?S}`S+>xXW zh$}xl_%W3=@o+T8;oxN?nHgA3_i*Fj6XpZJX%7>z4hJ)GQ5N#Bec|8}$YJQ|hsmhK z|9T!r>q|@{bJ+Qx@i`v&!0#%&@ZiV0&JMU!gYWu@7-07>76&%p&hF97mG7|axiTpGsAMMTl-#$ktDZvR%) zyXl9(VLkl?h>hc`lDT`vXld&hK@k{kqmC0fKf8dO4GTz$#so17gT})956;!NKD+W@ zZ)d-=wBIqRUai09?yq8doTG)gP#W}*RI;>VFKjM&y?)KE8};_0voNfkt8oobxSbhb2ZX$_?-_@W^jDWm&7r4O*@Zq`ZF*W4Hc>RkuXS& z_gsy3H9vdIcw=J?S5-53=DRcJYM2#&^IgYcN-+;VABw+{!TMsnYIIoHb2YwiX!pWI zPbAo2=W)EOT+JL%lKO|*=NfpDnh%VZl~3|>bLVNa32V#2%apBEG%ZD z(5x<33$;gp2KL5OrKfaJHg3=#M zBf-YYG>R1dWJTEbln#+d!$KWGG@WDAolc}CJW3B?GyBQZWvgKJC=C^8ll5Ii{_wq6 z?=bf$BR`)bV&yYL7KKvF&H zedNWZn0n3ZHZ7+2A0Q)k8(D+fro|c%2^)&KJuA?i)rSJd{b#v25@o8C8oxtl&~M_8 zI##0dBr0N)v?opDb4r{ABR;?U*aq>-Uyl}-j=@sWIg-y{FP{JM*UQFo#VG4$VYRhb zDbz~M<-&^5Ttp%4Rtr}emBp&=JN*6_+&Se~Sx$XZZiK_io*7 zWZ9WuxomgYY~O0xF85@)%28BR0xS~%Z=y&Rp=|1wsVYgSl9;K=5mi6{2mnC>5y*%D znapx}E%!sejBG!&dad^I>ec=W^Wuk|nV0E@8Lu^$=N@}rX3ejd?uYrlea?vs0FscD zl?rA?+C>3@h!f|s&p!L?v-kHU0>tyg!?*Z{XcyXQ`7bzIQXU|?PD24?qBe1`jqH|jh}014fXI`wwZ@f)Ja`Jr zIAfBa<@&xqdT4(8azrxcDx_)FU}jFl3j2>v=6dM~h;YDV!1Ftt@WUjYu_sEbU!EQus8Ao!8H5!0jc zo*jTq>!A+_^Z;oOfV2-4V}f?I@aK!fnq~IkAWpiQjl}s1`BaFzz_gwl+#(ZT3{&vbBXV13IzyN?pttGEvxByEaY_& zVr2l3U`|1eR9WEQ9(2+aMtVJv(711NM8Lp;Q7|tGi*v>Juu-Ar?&K7cfhyRMwHzt zMzM7g*rO8et|brR6H0rvBKRPg4PQ$3VJ0h66U$ND5_7}0wJ=Bm7%Exz|Bx~2O`koa$62_pb#sF*k&v- zygsr)52miey7WOC+y=j-Acg75@XhK5#YoARW#HU^0+Cy&&>tKl8j@9V!32d35@VS| zknEs(AT%XMWwvqB3Kbm_S3nvh?-qWoxWd_{tFklfoNcrfah_Z#5hm1%NSui;in#~P zBdzl>X>3B!W0K$VuHtr)X0y&on_2Hn+2awW6Kk(~30p>f)}4gtYYZKypPslJIq8=@ z5UDD2mb_dnmevS^xcPX~51T}P07DE{%XZ&sACa^%ZnN&6mV*h7TY^)L8Sa#5Z#NQh zsb&)tcUm?1K#@2@OR-9D&0vJu5U<~?~<-U&lD zo{_16$O<3x{Zi4f_1(w7wtXZstqSV4hTm<_od9K%`W92naL)-sAab}f9U!|>LNo%D zZj@Z<;Q&+41}`FY#3|CKVFJc+attZ8Ezoh2pom)y^JI~SSmkLg2H=t$+P=y{nDOc5 zG0Oe$OP-RS30h}cb%+!v_>058s9q4X{;zO@+@(*RzBp2#sftTAr0nG8)DnBcqy!+C=h zX3psLOVqB}(e0P00pFg8_q+OOI9xqFIjueV$&Je;VMlv|RdDfUquV7g*u%=Mwri!2 zu(+2lRlBxyNINZ3n#VLmFpr+9raV*GBB04Jzaa!%{dTEZ`NySa!l+9=z|s#sDlJjl zy$hP*4}~(iK{U8i&G;wwK-TA4Wg>B z>2iD53*AqhdUc@xPRd9dxjcs0;T6P-rEogZnIoI4t?jL?4j8Adb~h>;s~d>jYi+JoHn-dBcB|D| zS+}>dnIp?6Y@tf}(WLx7dr$w0l&CMU*L|bBt^ewGiv!G8 z@d6W=<0kw?Wjc+JjaM)2e(=HFTi4;qjnnO3374igz?59W;s8_5PtH37EeU&0)}@p6#ZLt3OfC*q3uq@};K|ch13tKPTDD#5ZamiFuu_lXjD2Iz<1J z_#RaCmGhy9+)}|`%J3-?o6d=m_;OP;6j2u`WZpW?(fe;5UgtlV^l6FWAgRD1|D=%v z=VBUQfy!HwWYuiP*IvTEF$c-OMEcQO_4}m+9GugE&wn&;azL9qr;>0M6a|8Yt7A^?)&wV>@ylz4Q0%#H|FP|1l z8JPnnpw$n}rwm53a=_%iCEExLIV&JH&dy-E$CM$N`=M%?IZU(u z-t!_ibKkh_Y-}k-?QH+DNFslxi#h*InC4lfYC~Y3^HgN9B71f1Jka!mB2| z=DwZRO=k0}rRBca_Px>gyTAKZ-s0~bE6!)`nopdT9R5ZQ(6*4QeA3su=`r&Bk&u7$ z_WR77zwZEa-wmhOR4+vkKr287Mne%@6J;Xxm6_k^a6F;Z0(xVqJ>CA#{cJW}ks0-h z6U~3IDAiiru@+}k3mhJc$Z>BR;rFM%Sd`U}x2m*b_ZX-Y6&XL?I1UEqV{uo(0 z0i>aCyTK4R3S?Ee*>|0;71zUhzY7F0U~m1{cLvr^?9ssX_yliyci^{xN3_@N;`uPX z^YJ)DS@&FX+a1>f>X?f=WAu{Mhe%c%du4lj4c7wuq>)T1Z#)=$wrr)Q z?`zJn*PilBY^r=3&+*S$1^k)nVd>_dlDpe_WS>@UV6w|4`SYG1xSs;JMZTb`<=+n> zw?tl3-yYf!L@rI{-Y0%=6!`uyRF^p@M0UA{F){#5K$wBAcsLw;E@0>$_x+K&SISX! zk&h2C>oI#X#ZL?hVG+5Ac8&ULXVhP^bS`6E#eA`FFB;W@78?L8L_G%7yAT-uTB0%g z)>c+FfPmv!?*kW!SV3zmYpc+)=9a+0!U2Kr(DN3pjTZ7u#ch8 zv9swz^geRkfSRKe<_17>j`>W>IrY)|YqzWr=s$oMLTyj%0OSS3A@0Yr0>e2eZ{Qvw zQF7cHVrnTMKH{1n4@{3}IGVY#VsCR$u1fD`C-^na z@9nkqd1kodsQl1mg}nKA>>}-V-+?B^u*l$+&A=~NPx;bkGuH7AWD3Fx(A!h5BTM<+ zpJFw4q6)gH5-;SHcF6K85z4Rl-3kY<($#V`Il}>$2@b)I^?`@6!hm4}l8Qp@bGd-v zVr+@5IeF_%Of&ScwMQ77wyg2pc=7hNA)uQGIBLDlTfWE=o|9<>Q$jDp5IA`nG6JDO zl|haYaX8o%O2(1njDW&q1EmS2@@dR*oE)6S1+Zw&j-(n0CLRD*$=c`LQbl4@D#Kx_ zia*X88fRCuWmX(kZJCHbfen0Z7#jfDQky3q^%JgPnjion-BZR#Vrnqs1X2ex1ZYN> zxo9YqgsNjA+i0mWsLX=JzJW@@5=A6%VJ(I%gW}EZa7s?~>E5H-hmW9DUD$V)Z`+lDua z;A6={11IMjH?Wh-c4&h{2VHart}`3pZ?-#>`Qd|gX#fizU6xWHB`l>r^q*ij_H3|t zM;49Z0rn|q>)zdo5nSJ1-B`JLZJr^_?xV0zyW?X=70%G`91lzb{3yc#`Q|ZV?IJ4; zLnT}I$sSOXp*_HC?WoTbdaTXFGGoseW8=v4PhdH5-CM-wZZNR>G`2zq-_c9ZJRrzK zFUXhy=Wy?CMH43zy0Gu|`UCt=tbF#VJ>)`46^)?^M_lF+JiO71AHj9)trM^B3&Q0m4{uT z1z1eLJBp$8#OYv%q6CrF6JbBluP6hJKZKfqlZMT*rCGG>X2~>WC2;nDAt<%u?t}1q z=!A-0xo7tzHi;V->|vm(xI-U-4T2k)7A6C#2g?h_GA65_j6yj<(phops4r$vDr!)$ z0x9#Zi%k)&Sd}IRp%nSwv5fa(VoqR+!c_sEL~v?%ur2gz&^qh2mF?QvrkW_AUkH`a zlWc4%P+CV}#da%DBG6cNR2h!LqY9u`D}dtY(}cq2yW(L%3o1PTaQ7;aUr|zDdnfGI zH?OU(Y-~E$)>qcAWmUoE#wwf%S7&R2XP*_!9hVaX@B+VG2K;Zr)e05P70PTdkQ9|6 zY%}ghCOQc<%Pj|Xge*N}xlyR(>m`~FUJp-a-n9<}DbIff9# z6y#_?y_kqje**yL8y9j;Juh1`+`lyVCziUeEL8hXzLB?+mL~!C^F%3^jjP&A2oN%+ zD5kbr=rsK&=IV~PdW~NR>`qwFu|L7Vj0(4&;{Jp)^-B%x4wnXcqal4z_6*`NLZ3Wq zw{ER!Oky4@Yn~wC|55qD=C1YWyQl|KH~qj`v+8wgh49p#^#f~NzD+-tE*#M?S5*=4 z?O})OT0g}9P$@6{@?ntu0NyV|F8W75CSzP4zO}Ow|D7WTJn(s6W|3 z;;%vtSJXog9(-9#SC<83E1GokcxXDBZ|8riJk(-Um7%VpW?lUg+Lvy(`CZo*?@yj% zWmHK9J3navB))*JHwgGe5@Z1s4GMrD1nHnAx_EWNP}U$UI(i8zjKYiUtxI7PzGY2Y z3A7)sZmgzaZgOeR_8%n)32&f+Z;n#vX723NSE3Z6GQ#<{u#RS_5+eKp4HE-r;WIBO z7Ux$k%m{u)=B7l$o;PJ?dnyTa3W{u_u8#0u2n zyZ(8Kkm$vO#O8U1+Yf&pKa>3MQG6{^|HGf*`h!FMnyL383+iu3QPY)`TFReEFK1HS z!;X~f@NZHLsUNR>p_TlcFgWtrxZloTaHI_Md{+0ZlkjwVGIg@k9v!u}b6@xWrqQk4 z`N@-?!l?TYM=ppkzyW@>9Ud>{zUtpSA0;IB^?&Cz=?-(>K=2rouJ1fT#@x5zW?X&j zkUDM`0ql$hFmR*Qa{*0Al?cwm+&8kM_89(P;Gf{cbZ220eRp@lAU-v2ynR11+tGnJg-NxOdAU) zL*oahFn3}fxQ9bBdpN>9mtgumw7^;vYdbJjyIrKppz-FwPd9gFf509}K`trxZtjQr z4;hX;>MSfkJa;roj8)Hlwckda5mIk=8rXxDr-7d|tWxgV@y|9-4?F8=9F%-2x!>6s zrM(xq@7KkiSKF^e!;$i@%<^93z72KpuL$4@!`3{~09G|;Lki{t7a0Uw245fph0L9~ zZ{CBd@`tw^R$Z^0Q;Uq!Nvctc`_RN<1Z8RJ1ui7_?fv6vKoe<}#b)Kc_Sa^1JVVyF z7RsPt+v9iqeIrw_9aN>V2e{r<}z zJvif^o&58>1^vc`q=l89Q4pTkXGV9x^8Ct$@fHCD>80ayN0OP07PwRa1TPsp!xdFD z9x~RjA4k3-!suOW`j9OriF2`b&jk0Jf28u72No9}2VCH6XEe?J+4$kw-L4j{*qP0T zX1lY~PK>xa&Ttmb>54t@j4w9ps+!x6c#)Ux93R-Uhv~pO)5Vl-`>Qx`4m_Z5h|@Xc z|G3CM^19v!4rRP-r2P-CwYz7j*xw&`XQ*FppjBM{6b@@P0q{D+*)(l3ryn}oX4b92RAFwbI5UlD*+1Yo_v z1OWy_9X{fp$M`cDb9H{q(p#^;YnY5z`n=<*{vs71b|}GLzbT>2jBS2#_$TX(0DJyh zh=mIQF&Np=IDFCn?~;v-g&X89%}RZCpw%zE;!+Ly^L%~Af?(kG&jPN0amcpx|Mped z_DcyOn;Mx^1HZGdaK*aEXlQAbX z-{@c~PIDE$f722}e{iW9^Iz+VtO0Ts6XvRskz-^S0PQhCepGaTvEE~|9piW&Tkd)6 z3BYf2BBUO5oKMOk=CVKFLN@i5sm;N{IWz`_!eAbPfJ-$}KblErkRc0T+h-C5T&jlt zFhgs=Hcb!)@RA}6xK!;bl}mR`h&K2d*ouT7S-vX5fXmdV{@8OYwMMbt*hP2*40+0vc6?sb=uwa?XA`t5?bPf$rp$J^;eK{@|!VE`HRn9 zJpVO%KVJOoufP7auf2jTf39_WuDzMD;3jOM8CJ$T>-M}W;RS2US6N%XhW?vhqs%); z!(@l}pSk<}NB;RI{Lx!r*+%|Da?{yc;5?gkbphY>KQ+5GzA%#tVtnRA!o%(6JMoGa+OIJ1JeFjLkN>K1zf4oc6!O=WxKCD>IdmzcJuK*lMjI~O~}fHMe>0??cR zN?*o5q5G**FR!jFZ!#!>zmeH<+AU^k^$b9}6V;!nCwLCXW)z^SeCa6`s{Bm{r9FMJ z&y6ll_EYWCZ`V#7^E!Q1lRjoTME{gHM^yEdcccg_^Pbki2t;JTvqhjXHJ+ABnhV2e zD9mLj5{a2uw#ucDIX(!XZ3vJF!zv!@D-UgQo0N%x9yz_!`o2GUXnuRFFB;@2Ft3fc z{cwiD9RdlfuT)o7R>0UpR<;qoZ6gR|Xn)d({38J6*H?(gJc5gyfCLR-=Yxghw7yye z@}*QOLXyZ3MG~P?uE^E~!6J}cUgjQ1zLeDueILX=^yoJ`%)bo)nae1vIs!OnzU!MYT(GV#Yc$_taO72Ir#FL!qBlh&p zT=u=WZ^-Y*JM(1UoBK-t@oWHC1yGXv+W$@#057yhk7 z^weygJPyiz)9O$`2utlpL%rnMI(w1Fm%S14{o;f<8JsW;%e&b1K?nmmqvYh3` zGf5rSz5;+$?pykf;3JJ>U=lG z)|CxElD55$9dxYW7_q4pqSb*JiUd{x#OBA}^{?{;#x9-3m$4fJx^$ zor-D*T8XZ`ISxoGfEL16AHYY7|3xQ$>fH>+3RxX#tQ2dl?^u{u1na%Fa-)ifDLf6Q zPK3CYz_ka~iDL=yOBcg{XH{?D36`b$5wG5lUkC;`^z;z5gK)6*Ar7UY6UXr^)nAos z>TU8(=Co=9gy_JlsGH1G53Qzb0?<%Jww}z7c-ZaW6Zg342fZ4Av1@DVD_5(l>uVcV zW#nH%{aD?=A0~Co)}=4NFayk9kkcLeZM&t~UtV3?T3)-lY48l$x9e-mIJiz1r^O8c zWmD}-(?Lkg5_%Q%cj!4?6(g$UX5=2p9f{cV+^9cpiM0ESZE|boo{7dcL&yc>&KbEt z)}C2F&-Z%+r3bji)Yj|5TO z4<#B7Q`;G~oDMO$Q>tiUI<2m3R9CjQHa8#(oS=WkKkqsbGE0Dm*=EE%-x*Kl3RnDK z+>TJ+IsRj5cYqb1Q8_RKw9aWcSn&BK9zIf9STcI65{;q!EW~m3KY92InP0{Xb)nLz+y7_+7L=!>A$Am?YeEM zN71Asf=R_1DK#xqntUKOOby`gts2#1?a}SMTla5QhaFBFkt0u?wPk6lSRCx#y zPnkY40i=dNp0KOd`*<=qaYF|YxF|zSFquh*V^EFiCxPvz?S69kLAnU|Z%lr-4e1)% zFv|i+_^E7a?Sb3#RC`6jsWSo5(2=9xHnd%pqj84|HeZ{}Mm;#He-7}JmVn+E^t8zg z6=_LK#g5+&Yf|@08@rRRg4)8!;+R!bEHi$NZL-Rm=tA}oUp^%dqP~bC89MA)h>1uA zIx@1MX=#<&9W+J^p2^9^4r1Fl*>=P0Pwzf>y#LJF-F^Jg}8Rs01C&&2*LD(1s%WE+O37C^noXRw4O}cCT#1V zZ;!D#`n(1Omj1kYr=;zZjSKY7!F;XU{WHhs=H$<$srwL#c${H%K{23LV)LI2E&`MV zx*rNcI~v;q+<!h_uu!l)6gi<}r@Mz^;g5lQjtv_P7d{39Cvcz#Vf`uC zG@5X#^ms(hP{CE}UB?4_C8S1@hLTos;9}h@S}1vkm{IBllaoqzb*#;z**d}7FFSk%M(A(y9YBm?&=I3x1O@C0NnRz zkIG%7Vol9x*tzH1?rhK#u||BjwP+N^iyr(b^s(5Z5&ljTuKNu^{;m^=A0|;5@JUJ1 zm{5n(QKXe!M57m*C*MCJi988-P+Q!sEx=5@F&FTwu96-mObWi2GmzCW@q)6)?7I2u6`6q@r;C*mR)jX#u{jAQUW*pOqC>KykTLmCe{zr18MRbpJ#j2C z%8cQMa-osnbcF?G_MF^!(T0?P;}}xhBOjf{+HlCx!;XyyLTAve#O|1Q_}hJ7UQg5P z>Eg$n^@oeg*5Xn8>DYXtx?04rjK-0=^vt{lMle=Bdl33;w34NUbTlB(hJ#U^M?AC_ zMatq|R9Wgr$VgTMeHt?sa?s$3Rjtw6Y&;dxI20X6+~0J&q(<`Qc#tyVU6$X_4IW1k z_Don$w_3OLnT#h)J+qdw0+vN|4K4;d-Lco+X(p1?SetvA-+pk5HNV*F`m9LOyDUk1 zvt0HWa`?w`njmu=_JVBEU6iEDTb%#m3M$Ku!o^-M zL9aq6@pR^uwEUC{7BfqoX7K&s9>h$Ao5r?0;PJvLfhO^KWBNmkE0GiR$T>X${aeUd z>FshyCMsVe>P_Kme+DUi_Tt}u{cAws!hfG3rS7=}NTa{xpTEMNDa_-4gNqLiU*K<+ zr~TjY%3Ftj!+*Y;LI+dj{d-(K+c6BX_ zh;(k!YhTCof++vx)50;8E za0ay%XW%+;s5k>x0Yr$HD9*sMMtpGw&J*$C47@l4FV4UpBTkIywZ_mYfNBLUSBHG5 z6KYtVF&r>|B?$0So*|^pq2(zLfyng|Ja=Fl1Tz~$KAfQ+oZ|k>k@Q`;1JURzJuHN? z4+dO(Z~E~2GdkSQeAQ&|#N>;#lVko%YMAUp1aYzGy&+>__n>(08JPV=H1Fr!c8 z2~G$`vMrn7$Ujdaa(NwLp<$J=?u0<%w1bqC5gqDzVQb|02U$WoQxl=o6V*HLn$2{7HQN4QJj6v^ zx6E&0bUHvgkgkPnLDmvgW_}}cOp!g+TnzgN;_r>3Mb#R_6CysuKk(2yZ6-2}oVMF# zS~(-IkrPz4_6CvPbMSbuY%vFM-i9$dbzlLMWjyE{ATtwD-2<0tjs6_*^An-jy1!xg zsi^FjVv_qB%1 zdF1ok1C?9JQdJx|+36Vh+X+!x+kgwN57oB=T8b`(3Cz;x~vw*fW2T@kZ266hZL&D4m!oGESWJ>Vfm?nF%J`?c*lCRK{9pJ;&oAlj1*?n zk*S9ot{sb`V6G)YwIh=;GgRg#878!wFc0S1j3Gh^dWdG;B*U|V>87SPTb7DKXD>6E zkW+kwFfr;Y>MKX`IYK~VM+3h0) z1vdgm=S$UC9&8D0?vR|(39p!^s_7T07!C&WLu)|Ks8-%mI^Sv!P;2I%K^;RCohs7; z$&YfH?hNU@#hE((9~nJJj+BNl>k0|6$@Wl{X%OPIuA`Mh<;Yq=|3T497Kyb(k>1g& zpOMW4_vK$)>bUs2!1vgyhvzJu-kK z;qI1Iil+(=#Z}tyiE{w*olJt6IwZznPx=AfKjrNzapk zO#vj#o0?&=+b2;lu668{b}9o46H;ZcFdK)dR4R#Gmc#lv{~>6E35EMAp->dGc?AvU)NvjTf4nNAf) z&Y_{UMILp&W8}2^o{L=oMwb#sR5p2+`IkC~!yYSo87$o+C||cvWkV^_jN~({k&)1o}cvvimdxcH~XaHZJvp5etG#mNuCjebxI-D7ai~*;nChK`P z9*+3*4wTuB>L&dJMF@i90=Ft{`|XN{RQ+He3NU#^852}*XU9UukJ1|u2VSbUkbKET z$B#QcP%ofpcAD^m0mP%y@{y+gqg7}4qo(zQMj3}qMiF>M0ZLM3S=JBL&*{Ekk3*Y< zpmGbfx6JB{ne$=VjgQImd!EH)?PC|HKE&Bz|HLb^EE(~TB@ex)vh>is9J)-GYCI3K zo=-jFd;-t_qM~Spn`8*;SEthiO+(M`v;nkEWsF4uHo0TE?O}i~z-<5+ii3Q|q#ie* zNQo2W+JRR2GR$!+cTIPgkxU-F9|Ls6(m{U*bxrcPzH;8v9L+w)2Rld=^TQPbid+kv zr1cR-NzVp_fw60DR@sP2`AF5Ih+rzP2j7{OFB1jssv}~0oDX7A9fzJZ0r^mB5Xt|z{xs-eiy+~@HEV33;;YJTe?IDOwwK# zg5^1qa0IQ1+@d3B7W5IJPaKCRSL`tT6V{}_A?6B&Jg#AggnEi5I%eo2$ypf&I(Jp& zJ)%g|EaWnYeSliT5NHHw3Q3wI5(VcHO3+pX#+~ar-yJv0Gz5hqb&8%7ooc9(5xLqP zIs$7W(@E5D`nA$zG+`<;F=NarDOB2!#=zJ!QVa)6-)SGgd=d^vR5>=nLa~Tg+a?xQ zoejq7$#}#9;ds>(_Xt`I#-<=`@muW?#;j3RjcC||p3@5KC4t7EVM(>yaYJmn4MYzt zNIlTV%nbZbfGwh2LKnQ}o_?&6c?>VAQg&~grqoulGFW9K|NEm6l2Mt!n!ZPte&WBS zL}3Zi!&yi5P7TzS<$(Ym>O6t!62b#KA%(F7n{eC`Xbpx-K&vQRBGLMNB=Q5Nt_6uf zPbT7~DhJq}4xlpNzJO|mxGH^!*9zK-pnu6CK7^on(Rhf8$p*Q(*Rh9B62p50kH&m3 zJ0{d;5@x5{S?O-343Q=U94V~uJ&1H;^BXMh zt)Fwz0jjy4r1Zu69?t-PMhim9=$y?P^UG9b(T8ume-0>gZG{ z;gs29Y*DdZx&&gIv|-FfMFV$nnTR*E+~g_GBo}IB?O=Gg$ZEI;9wD4z=wS3d(i$Ak zc(mVFGBr`|yL%D$Bx*hOurRKH9r4a|u`wK)i^o z)y;QrEE(04aeKi=%EG6yVwH;LVF?PYRer>3*akUm(BL&|4W>h?o736`H!FXG#)_z2 zyCI^gRaAQBrUu2g)Fi?(NMWY~T$~kXEaqXLTO7xcXNE+(P`EB)26f<%Q*+0`MiUDh z28>>6j+SzaBy$!%&*Y18o!6hjD=AXkNd3gQ{VWlJ)K7KX9t%h6I#`6GKD;W_P*{}c z8ZgQUrNdLr3bCoWQo5I_^gzO04;2N#HLUGy{NkdLGu5vR1AD4weREN(2K6qkfY^bd zA4Jg!8ftn2tFceo19X#2UR4{kO;0=REh4;$=$Z6@8R0e0p!y2Z5yNDhNf|+Dq8dQ` zl-`F!t8Bx;frk>)L&TPvR%WAp4BgDt2Nnf8j>BATsb{p2gFcYmF{~2`w+IK<&l9Lj zsq#L`N8()@h9G*_%qQ7+&f4p#31k{i*ANhH(HW<(Wfl0~vekm#g9;?_o7ttOwt%+P z!Wqsa7z{cX!66W2g$a|ksdZ1CIfI&d2+Rm%rfTA$zGB@M3vo}oK(Rx#l>WsWXYXaa z)AF`K8g0c=b2tqQvIEqgk^)3Iz@g_sNr8iacwLn!h0yK>9&hT3Js9=v9q1Qct5&7n zPYAAn^^DpUL;kXM!Xzfpq#-C?r~^+Q>BPdDfx=lguJ^4LEIoRiZo%|RyD#a@xu}R3 zOJ%(gmosHCUZ7R!G81K=9diZc)aW?JM6b*0&Gp)r(wmL!T?Ys3WK2LL9A(T_JzdWb z7I|QnTugUeron2I&5L$~gq&|uPF?mVp~*1A)y`cxb5nMW806xdP)C~z1x}2V-PpQj zuUyBeAhZ@xZh%LJ1GsAJ_7{wukH@gKu=5c0!7aVEGz_?=>_1>BDMcBV{4tAeQu`{C zq}enDX;UA159*UU2ZD)>0Do5%lu+QKtUI!3o||1;IPQgvwAGiTdyaIQypVuk{WNSOQ6us z19+NNYf6Wil9XOshvSo_n=VMdG1GYSgKocBIpygO2wW)A3-qZDO=@UY;QEPw341@z zq{LEDh5?U`CXXnReJmd_`&jg8u`8b1CF^n6%kQqZfX0%T*PC+6UY5{WptE5$tnV@w z_NIKU+Q9kWu?lFc(-Jfcx`jTXVJak9*4z9*V=-3ur>nqtdc;4E@n;GV>u0$5;P4~- z%_3qwiguvA3l(r;8mF<+#*+1&~oyatNIdAxy#ngmw*CXygNJYZqBGSkQBc*Bi=J)wtj%j zq-=x~LfZob44IIi(!N7imt&}`@v|h-np+GI;sXLD&Plybz_pBfBosvn+=J*IBxoR6|hK1%m)CQ;%+VrmvenlWo}8z z7!thV7~kBt=T{d&6P?7NzkxVp-RXakrk0jT@F(ktH|L-J-=}~3&$#mZ`=q*|Hu6zn7at({@{bV zw~Aoyxv_f-h~)xeIf2|Rg1HNb+ zRROV#-Y6iJ3y5U}X$>c>GM@<1TtF;qs6UfpUO+4ZH@kpXc7aD+kmKE)?n_J3uP}Mmix7X z(FtaJ6Z_vX0RIOKw^J|e&KK@vXn(`{FG><=2xR2a8KH(>T~>QT?>fn9KbhWN07;R7 zhWtTv-o*fYyB74Wnra-@ z&Q}sh>LQ+brC_n_rm57SV$V83QP+8n@`UtW@yZ=DR%*68+3HUgE{tF{2*Ui>jm^)3ph$0{(p&j4LEh96UP_mHhW$*#OgxU9T zozXYv;83p_^y-+^ZCMc@#otvCSF9UjchE6)Gq1P3Q`IAiu9`0+fs^7swCb9@v2m?c z1_>>Hy<(AC4;;+)cmQs8&5kE&%;FS?UyBw}zU>IH5oifaDzbpZKdZ994|g~QDWyRt z7j9=q)B0)#Z6KM8GgxxJIEISxx~fY+KO!MaC@-_T)A8_wOGaLg#L4rCpq7Pda1p}- zj?)GbEe{u;nb$aC843#-8f~57HbM)tzo@d5PYE)T6+s_xdgu~iAy%!?O~z9pIM67K z&#-jmqMl|WQQ!GuA;EK=R{<61UsuxW{bF}-i2UFKjO`#a8%@P!rM4FMORNx%!Kvr ziuEuc=Pz5Os7Pe|_@eBP8TQ_BI0gy#J>g$eZ{@IGzkVHDl_;p3Oued-gr_Udsw8*A zU|H6E%sBTw$L{P!q-+4eMVXf^tE|Wxmv*dsB1>i0Yjhnu8Y3sWzOk~sxy&_p@>0vS z!}=Xav?HaX7zIr6+Qlfcxz{cxH4R_8EU>FL91xug18j)m9{CX%y&w9aW=Oal|42n= zSu#7Q?UE5j7Q516di-SN5T&H)2eV@-GeRWmRmU=BcGJU*5$Xi~2>pTv+)|&)qxwp9 zWf=lC8rUGrN{^@YvUHo1yz7%5f&u`r001li01E)Xf|k3Wd|K4lOCRG$ zn7G*7^xXDLLp;4hzGCf#;dscci;#^cmNa%EisP_vhhnLN7GCxZPO=$x++IHlg>L?e z>>1SNwc~<8J_32xz_+3G`_2${7ItyMJt&snbh}bD_dq|7q_Tk92+bv|?LidR2qQoq zH9BrPWA>-60`dpANccTA>SQls_G>i!pY$CvR+C`f7NUDYge|tIW|&VG0%9;8a)fX_ zz!!ig-R`OCFggN2uflK&4hiVI1KT=sPPu1NyA1Af_#X6IT(W6`qU}ZpJD+fW<#eiv z8;08<=`3`nJ3@w?-j|W+*!U5SU0c0G?>mp}feZd5_#sr48gQ93KzF7~C;E|)o|Km+ zwxkI$^1MY{Z;Eie?}2<>(=qE-vcZ%i?WEIvtEklmp|89W5C{4Ld=FwGtxRoY@mst@ zH4+S1nxxeNN~CCF3%y$juHmYQODbY?Kd68l4EZLimE?X!`xiFCnGHyQ|0O zy1@B(jENRDdV$?ho)Ve%i1JaTDYs1aHreN(*QLhdN8vu-n%TtEi?we|l^fS0+2cjn z77i?LjNM>B?@Kb6EL?uol|mDtZ^}V>#LXFJF%3 zqF&WZcc`M34?)eN?&9L2`t*x*tv^$3&|@qL&$50oS*Ony6Sc84Yk_5 z=yg;2(~$q5TCuKI@dKRuDWzqVq1!rOf^&gQrCm|*-8s85)=IG{-lgkJ=xyp)acPQL z7-7nA^-$vxI0NU{<{FnQcOyIMSMffj#Py=07A0{>YMn9Ja^)}+A6p9@YZtrIb9bjN z>khekftjPH554*2))Gg!0K)d$M>{*#CAEWNa|B#RQ_lLI%1rzao?8D9CJs!~P@m>l z4AiEk+k#4a?88e)J0);w9_xEVmGtdnIuOUqa*=~@=m12UVCuGD#&un+s>r;bJS3p{ zR7V7sJ{_PzMq$hPnAIwY$6c=tgO?#JxJRmni>sTo>ekn)K;?}16BwQ794D>Qz&&zc zy`&$d?LM?W(eeyIZ{es4DiBWn9{lpI$nBh@x-VdvKy@$4%W?jZIdn^W`&jK^Z(6KYrQ&%$a5Ak`e#H2< z8yMaOuUTtqwKoAmXg$qs^cI$hY%ua#?b;2oS*?QuwGznMLGM0K+i1(xpB|KyEB~Oh zd{9#Ec=@PY>hdw=7C$IG%NE8K*4K5Nq~Z0=4f_6ho`n^zUaiSXlRj>&at!v+3mB}x z4PjBKV-c$y4t*L{oh3983s`#`9HS3fPITfpdaT$x>{*HnEoI87<#R*A@mEJ8S65cB zk8wsQK?yssxhV8F$1y|T4uQM@wNaBzk=w4h!`@UyS&4?KM0G?^f~nPfAMdnVcs<^X z$Q&LULMlW?Vr%uu+c&Clc1)`V=ggQV6Rt)*fqLsj9{U|7@S3QdWLhb{E( z?`IwDbA<0w&>YNaY^CLu&L>0;c4QL~{~F)dx!?DiQ<P(8$&lkk9at8#xJCd8&u3%4M=f3*iwRGr*vUK7A5J(cu>fG0VDGBSl zAYv@{O{vX}X=^U5D?ges8!KYJJDuD&=sQNX%n^>9G*NL;G3UPeHj- zIalr*xu#FznhP6M#3asf>a&ykHmzID={4qpgQeUzYIUxoujazls`W_xPEPKdbVY4- zjiX6#NaL-(oU!J<5r6b(Px~C8C{q5l+*kbV`8v50-E&mVaNbuJ7I=)YLr-q*+qNBNx5xpIm~<)wz?gb;o+h~59Y7DO z2$bA6?HzGhBdLNeY^S5fEe#}Kd(ZFY3YIsOosqZrV4}oWVyvj4r1~!;Z zz+gxxfZKcW5`F9PW-qEg-N*ZHvKn4=7fyA<$cU?a2P;>)B7BlO9Y03qS_chdqj-u1 z2fW+DN2mj~4hPSRyZg8AKWyB&d++vjL@7auOWS!IH(3onMP+Cdjbv}RQMCl;a$ccI zUZtzkP$ujHBAMt&z7duOS}LJ6>Ku672cSC;9Epg@P8Z>0qYi?>u+fGfl`}nykg6+4 zbUH!mtO^ZVd=Lk=1-LAI+V%&a3SyL}EWs>y2MnW~ZZ+)NBc_I30lBH%6Bnbuo0DQb zV50s%EkQxZ3 zsNAahO?L;&u#nb;14R|?NRi!H#3Skk6w*h}mY0@35A>K<5eqW|7o8yRg9Gnr7oIdWz zO7xJ7zPAv(^@wY0@g&;t~fp!I*VffEPq(^Y$yt3 zuD<^1qgpCcb$h+VFw=F#RFTC>B;s45iGRK5c-v5Tnhy2sO zlF)GSyJq10Kk)Iv;s43M$?q&u3RE{GKk)79+Dp`J_W^W4UkYk5ofCJ_Zd#gDHx5KVv%)TEOiRonoLfB;lvn_c41z ze{h_C`P8XT$ha9}H$q!ohi*Dv$Z$GJY3@hpCt}7X;sz-*GA3>oBa1R}vDrIg=&ZZl z+&6YRX)M|)QU~dSDQ~AxY+`#bJ*K&D+NMzy(nK?a@wj4U?E%bzVc6L2f;oZW+P1yi zx9f%k+t3Nv^uwl(Md4~M=^)JQ;cwDTEut+4<`L{7fPM@|OdZR88^6i+a8C*kM%ALL zBllJQAr8=Bf9mj31P?WvO_hANpts?Es6XYtEoIoFiOXPNfx?8WHLSgLYq18Ts~Q5j zum?=LU-M}({Xs2g)jmGnIzH*09&Vl;9dqer>#Q+Fv_dU3I-K^EG_03fI!&!h)Os1@Zfq5ArEy)43tVz8uT`(#`?{ z<>@iY{UCfVt1C*%7bOvVZ$4U;CA>@xL$Br|?d?eY1Sosc}u**$W6Fc0?h* zN=v`c2JV;!(r-vr2>6Kf=6i98rW*Fy^gW2@Oblu4?DD@_-M!e>&sfq(SC2Ob*wp~Pn@9Q9i7Ab=!}2%vtP^G z!Sx9sXT&r45QoPDY*CR1N1>DiQeenPezVh=I=C**jEamk9BRM%sYr2Vbl! z!~SnE+fdH4<~TF@`qQJ_FPjz5KS5w!&jDw$HU!lfjA3j0s2 z>OUkm@w?Q2>Xi0X43U>Ot9^5(QfcAKVk+e|uB|jF%!o`#$E}G(GjMGlk^Vd)s%BNl zL`=;SvpCrvY_|M?nt9eb4rIpOQ3wpE8@J!R`+#v~v+u|8E`ZqyKUYFX;vAuph~6Z3 z!XzKphz9k6r{gSltTTF%peEYL=1Je$%Gw%{6!!wa8z7GSF=B4qV>?{7_Poxi^%LJa zK?w7ayKLQZJZzCaBAAE@{rj=s_h7NV=Pz512T&7-aGa9V+Ope6y?{tXaze_l6$EW- zOa+u}2N6!X1+a)=q~Hty^aXj$sPdyd^3LvkP7xJOK=IR~S_ zYLTrRloha+$`V-zniatB)OWeXyD6Wm-11^QD0aTZ&bNq`d@ybu<|JAYrbR9`Ugw0H zK7V_bcX8VI$$dM%zV?;%Uf<2H_nOuKY!OS_9EH2OShShRU}E^qR<>|g7by-oS+p;M zyZTQjyq(5f%^jX-TJFv+vFEq5y|J>g*(vejL+E1i{Xr@PkK*zDLX z<@r7Qf41R)RleTC{}-R1e{-r63a{=3kt8?(|MmQObuTg@F1;IfN?;40?WE84%i_+` zF6oPWi-k+Na7m{@AGFv^I(c*Ik}iC?o^{%o`|98RT+Tyt-^{NsePw;?-zw)V+zD|T484dX@}p;k zB*N-wmRPmuk|8ms7hq!WV1xsGL}7#{IMc_X8!B3@yNV(Qhx>p|`~+ZMQy^$JL02?z zcy|^84eUJ#-$2-fc?9TXC43Gef7C#D0r-tT0D>=%pIGo!*9V@n-z~VGe9ctglq@peSdxR3y+EFUfb-nQ{f`; zMhh{piQ|AT5g|94G&aTB#OokU46AKmRy6dtCh3rdLhU9vLqZtVWr{j^l|X=d;Atje zA}I_c`45GV?93jV*r37{PDkXnE4YYLtJw^}uGDs#U_5I!`I-h(ltO+d+5~xVE`q9KcbgA1JAC|XplfgO!$y@n=mAk+b4aV)L29hnr+}VR#Tu4=DB?v#q zvJrY$$9zk~x2&#b-dm3!)?}3YRmo(1@etZ>F?BtW&-x-i*M_GuHL8<0iqO^6px<+WGFt{;uq0+*P%MwRg))nin$N7!`qlw;z7_gm| z1&Id1i-(}S1`vefL6?5iEGg@XEG08|>S3mdgLCec2FVD37O#scWTY^oj!YxeaP3$e z1#>MKsvVh(nV~W_$uObafo&+dt&D!u^BpDVAuyL#puWPp*07KSb6NEL>BemnT>KeFvYmQd~ zTjUYQrm#+Tp+68#3JyRangfYf)b~5#74wv0meAj>HyhBR(8>%0ggRE0zNJn&n05DPz56?P_`7cIZ$L$Ja?98C4^XJ9?<=R?n0_9dldZv zT~FK@emwBTsxK!hg^MWN2R;n}d9aG0P;wP^piE=1_4_2pvk^t^@%43!4+tz|5>?T> zHZX`Z_%nOV4Xh@R%OEJQ8f9R^MB?WSBV9bh@7~_%w%1c;4`@miIisO}NRctt&-w4b zyF*^X{V_wZ)p)Nd8C9f==R8`cKuS_|Zwx?RB^Hqb<6fcowg5wFH3eHgW;(QkPu%0G zAM|Q=E3B=pu54CUwl}w~Vur#VN~TlA=`wg6YFp${(Jn&1+kMZ)E&!v;*$VKn6M2{U zmu80G$r(j2gM|hPlZ0*BC&Qey5;MTid=H381l@&GYS1p#5efbV{NcP@=2ZSjE9-nB=Niz}|11^T4vJS_?5ld6NoAMt01V=*gZe|{x#UzfZ z%M#fO+z#kb*Z7rd7sP>=DlSw)rELfNamOd`t?%tL!P*(wL8ay6haasv!yh%RCu}Sc zvV?k6V)9K{7P#Ey=X77N$DvL_5Eh4+0^|X;I%A@FrUp#ah0pIHAYPl3hu*_G&_O~Ws~(BFR8z2$gbT!OzD|bzI7*6=)(feR&he9Nro{F^>zHzI<0- zIqzwXrl9L5bOEBswZI9%K;_yErGl}Ap*V-T=H`Z2t4a9>p2z^E0tfG%3A$D`y(yzQ z&r}y!b65!-djx3l3LWg=hLsgutst?h{R}-oVa3Xaod+!&o7mylh*X5WxkHsZ@-ZUx z;$vY#ls{w?eVGt`Xqs$DBCuo~fs%!eV3r9q3sg4<4Uv5DDIa4Lp!x~;Ux72k`zYjb z4M8Sr=0(E&>S4ttZwQedBkX`sO)um!8LxN%J3x}W5W}|7d|8@_P9atW>dJMU?~a>g z8iK;GyQJp?wU;}f-XLUHZDZ1L6fJ5v{aR@USB@IL`FGzU7hw9bCdJ^ffC^B7(vU2p@i z1ZtV!;L3!RQ7t1)#|LjO1r+_Bx<{6N8ZRkPSc3F$)^Qo41_Ej+2MY|#T0=rzqWln= zQYJTS!f{KSQRH=pR>7{1&BXYjiw`_624hdeO;xT(er2qVkiI7rSEUc}TH04bT0rW) zAq2&X#zRz$9PUn2J;^O{Yp>WvPvr|SE`m`(^Sw~Ze8vu%)2Yg%>L|BFcPzklm} zeb+4-jEZtyeoApk>pN&Z#YJ_)&Pd9^)jMNgM_P}u^`XJdErazCVW8kL8Cc~zk8eH1 zCKb2+{KEhAi}utG1T8W(CJ{UwCThVC#e8Y9;PfDg-j_KeTO)`JR9=nstoEbf;5z#Q zwU}xJqc!HWG)vmj+`u!pv{M!&5j&HX=E~*|I1efRB9+Rnj}@^Q8KY5XjTq|+3PnMU zqD%D(4izU8P1`%1Z;H7by{#51dWvS5K9E|@HF3E4RJgLmk$J;+2F}1arokp^Yvg0& zgF68GhW9CMa%}~wbx#aDxbkEd6E`L`w=tj4Vt553EYI*dU^Ams6#7EVS3QK%0WuuQ zAXhC*nd8#llqQ*B>P@V!DS1tO5iyoZsqhueGmjY;UZt zc3YjR-IZ>46#-O5!nkqWPo;{!!R9WTK(Wz3xDYM7eqGG*t-2i!h#w?*8rp%m2 zkAz%T<(Z@|*LIhzSTvgGS>t+fhM|Me`$%hWyvGyclP^pPIEL_ylsp&D5 zm04Ag=9NlJ1o^yh!Ix!WM#EUQ4Kv7yxV5AM-FccSy7gh3g__PzU}v)e;B7g{2AuMlUr- zi;doCCUX{fy~!8lIjHQRuL`vV79~_7R!%70 z?F#7|o2n~y?S@R~>Zv^(z+De7J}Evi+GzaZqLMS^$H8e4<_OkwHS3#;S~aM5aRtPF z3H<=auM6h~R{Zn^7LwpXowWz(CYijdHfWokcG_FSyk*fd=>apsYo0;%kL>}>4GfcU zCc$iKHz2zW51rjaHAuI`-bY2sHXIyyC^0=mY^iBwHrmIiGFKm16zn+8gmOzgqm3N& zf$WZ9oy5Yy_45QOQ>wg=^1&+9h9QWTn)xL9b=F=_O(4^Fx`yBO976cy^Z@&bv`wvhDx;_}>$M&NGXj~Z zO8HPz^@??0EX2K_H>Mqi9ipZ5FXlLVFXNq-x5=(+4NTqY2Dt{`nn!_Gl;lqjN*_09 z?H}M|@t}09KO;+=CCUS|yLYUu)D>aL+kt-JwQ5!B{REa7#Mhvzy{w&Zb;3@VGz7)x za^UGBomhC)5yqtJeX9jaj~U#zVEU!qm-OabR7A{Fz21n+nX(u!(5kUc&5n5p2?aEA z$2rC((yCO{{MIKqwJoI)8ri!J4%o>UFK1BAn5}xco*^vqz%03#?z&8a)hL@60Y3`{ zGT)?}y6jKvMZn92-7;4x<;+d(C&VDfUJhCp3H3fU51f^#hdy|y$Ag`w;}U@0j@8JGMqi*8c;DwCwyGzDo>p9?L# za@avQ$5@a>b7r?w-<#F3$%6Sy8d~86RVH5KH->|e)YnRnzQ|H~_NFMw7msA(E-o*s z8NB#x`ccl~`s(KN4>FzW>uZ@`j0jBE!}Mr_!y-S=>aj#`y`Cseb&Qe8SU;t^WXF2C zsPq}Q!QxJ_;$BH9F{Rd&4%3=W>$P<_K3Tfyg7h0RjW<8&_M4Scp8f!wlgf?J_kA!` zo;c914uDU%++pv>nUq*6$}kwYQI(IFeXRPS*cE3t%6c63^1CZ8 zLc)uX@Gn(J_%lH25d`tMh4jO}$A@@#W5~+s1`;JN#Ray(Vv& zt9q>R7g@*W|DrUc*@MGK*gt*5LDJJ}N2N3+M?P!*1LY4;ej5|AY1widyl=K~@1!D! z-#{~dexZ!6;Zi6RuPq(J$E$U@_+tVMCBGyxqCW(FbN4SqH-n7sEEofqH>FTZ$ORnlmMCHC5$o}Hpv*PaNifTg) z`)dOUK0{{f2gppyMo1yFJwU>b2?;9ggHjV|<&rsu$QJx8IUDAdPonw*qTWUNl zBa=S_2o_SPnXZT+!evW-T(-)k&;~vbGCq(E2X>nfKRwo0nCCnU%gp79oZe}D-yc0R zzdhC$%hBn`sh5bIIKra@AaZa!z$@y2Y+6}a#WnthZ`)|r(EdcF;)V5vBfb3A z#wru)*D71<*FZ9~vbkEZH|*8QioM!d-MrfAtY2GyarhSQJ^a^yia+1Rr_Wv-euoeL z(OH9)B9P}>)Z=_ohwlFuRwhLt&+k&hsnGsciHv@U0RJ}^yZs;ci`eZVc6&ZlXJe?n zF3WroyIsU?7l_ILwBk`MLv@ST?E+DmLD?EAp@`is5S4)&q7hpZn0|q%Tp%iY(Or0> zjDIRcNmn2$Yv?t?ycCGaHo)ZD03|g@w+7OwKvbr&1?Xmt!kmKB5}>98OjrCq$}y_} z>|-Qk<$(MeO#RvD+^fxt>E3`!hlfzq%y$=In75w$phWTLo?u31>(b#Jq+W zkB<q(cm-~hVbCgYqYB(8p(NNC#DEftLUh&Eu6I5z)JK5?_7A{M;4Zga-b)r*p z$0b1~+4vHy@mVf6L*hmrJ=7L=BY_Z1kJ)->=+qV8nDDxzs@yVa>p?j6G9+CfzU=$Z znG8Vg2L3QCpOhJ&?vf`%afOBgP*G}H0$w?VDM z*RvDP<$YQfX2C`D3n)z$Q`R%{8YdwgL!*Ug`*fO!3oMIShmtBw`IOMUtO)u*&jS~r zkPfTX=qBT-5R_+>Mp8ay+a)!UzE(zPePX=JQ_h(b16aawCF2R3KSM`l-LwZx%r*3P z$&fZE1lDdmxw1l7O>X3UWNi~BNihZS2Jkp~9T29oHCeiDMcAC8h9liu%!Mm$wJb7T zDhf9M+4?8DTI?F}eL9gC?Pl7NQU$yD22(`sJPyQ zrg3nJ*H2H(63 z72F=&j(?=0vn-h%)Gx^hBWYdfFgfA-v$di)E8LbW#0gHE@R0l z`%T#8ugIQ3E%|YjE4`U~b_hEQyEvQ^plPJ$oo-jE<{sGQ!w_bs>|)aPAhRdK2na{u zcW-CR{?t_f5W$~--(#ar_9AA#M#KL}-yuOY`Q&ZkwKu%iVw;-u#8e?oMROiv?+;%9 zo>b9g=rB40z@|c0aSjP`*8bE=Sx42}}1jW*g4razq8?czw#9hJd zkaU&`4h}87FC(w9@gp3&wt9)CT}QB+3mzo+AykzbP&N(3OX|`IDJ7fl^3ueXlyvn9 zac$lrt~W)v-uJ+@uGyE_o8)eTuuzW7C~GOfU685JS6&H-1N{NMH_(oh)K(V1#XD3Z z!Hu)_t}`Q>(jq?X1WFo9=I6#Zat9bLJQaaaM6}4)STg?P6>?%9*7dSUM&|0eZo4H2r?%mypup-PL1sUEq8? zb^#XF=mmC1c}is3Bg#jWrra{w+hpI0UY8n+ABFpT@MFzrV(P`(H>S#sYmw~nB5Vr> zmN&+3FrfD(8B7)~zv@b%iO@IYr=u$Fb_NhcdLBmH>8ECT@_ZSnXi`RCyF%{$%H>!t z>Q&8jhbmh65Y#;CE-o&rPrpdl`ZLuAJ;tK&EbAAOGV$4BBDWK_mRDD>-$aJq+T&)wS+tKVToGWNMagM^{u2#^$w+wau07?X{KZnr^IaZ>+CuB(F}_4Yk_5=yg;2 z)128ttYckYWHdNBH%cdSK%!%>-$hP%@yk=CfJzGEej58=;ZU{UT%#;RK6C>epEMv- zC+kYj=t51Xv$gtQ<2vXVLtkY zFmYg-(lLbdCff>Z?}(ai3ro;rA6~*FZ&}|X60GuI{4vB{R#VIeMw`H~)Pfn;b+M{) zc1#=+aLTA70!yC`&>*92lJzmGRT7W8UK<84%og4wRl~*AOTs4y>2-qcm`Vn+dGXp&y(=&|5UyajVaqNi@oVZRm`QA;`Dv?uvsI7VQvLCf^N9 z#KviuHa5}Bz!}()d(4-?QuhONSA3=xjG<;&p``CXKAt*tiJK)s@D_aBurWLUe-5Zv z@LvM-J+CPEy?$O8YFy%O^K*iEsaxm!D%D9H^C9>;!uEdD=!> zuKx6(q+Iz2rR9T?a>vU@b!lF{gB33yZ`ZTOMODHZ3u}KS#oe-iLoj8sjD-Ifamf}K7nR06R z+>mhm)se{6l@;t`oDoV;!XEN*ImaWK-m}YwoZ&RZ&)=p(;@wc|BG; zb$Z3!&_3P?#`j~sxd${@80e4+k&)P1ee(8=YMfn@iSy!0v(p~HyxnR3dA#6N#m>Q| zs5->1(QRYb(d+T+6SVurJ$!M3rW#I2oOziQxh^$0K~Q>~_|^MtZfT#gFm8 z;4G@sGn}J#TBZrjMNCMdP?%wm9aSS5qw&y04K??Sx50aRh%#l|TdkK`1-+3I5V7(w0c_M>PN?$l~?n;kC^qg)^?+Yr;zbs|0c z10K~6M>yaVplQLNhUlL{>kJ{qaKZGPM$5tS>-s@`_p$t-fv#`p%T;=#yv~kQQXfj! z6(sa@@xXzOajf|((W{>l8TuBMa8HgC%kq?m^TK-RXac?#cXyny9k?T!5cSey#||Jr z+&sFTY5=`nn8vn6CTObFDjrI9lJ#GdJ{~*4X$c7#c@i|G{75O>?2?vOt9&n>_d;zV;Gzn*;6l>-&E05sBZ9f!rx}`)2Ei}JtxmSl?~9wM2wdakOOfWS&~ zR+#%i*?pPQ%;v&I)#CAwJ~_*M>$Y&*n;79Voki&E*1*AtCF`_V1>?iH+_&s)IWWEO z_%QeF_#Uh1EA(*Eh#kcIO*;<12g5frdhW zfNizIW3ZXRqs>XKj$sg%;70G@CaFA87{DEGa4vcX&V4MZ@jX34xQG z_v|vuHgY|l%v2d0-hdCn2^e$i1UOxqyu>3cd9xSQ=V9%-A}pdJ@?(W~d|FCiA?!DXB5e%qJJ#qF=R)bGbzsuY+Ns?|6Aeg0nv3%tiV6PX}wD4<*{BDhjzdRRw0|6ZncjB%S)L8(#L@aYrvT{ zSJzn&^UjHf5t}+heq{zsksu?W60XP7UaJZw~9TAMoVVkMVq>Z?kMW|B(oUP1 z)%=gPZktuNjm`I+d#^tbh!@GsKx8_zii`mKai8a&$9KMiSO^d|5fCxx*_qEUy8+L6 z8&pZJGdX4-l$Wb zqRWH-SA4(6zp_De{;un*{%`p4;okqwztdPbpDj-w52CXf>a%1XENCTeV*u zY-DuB>FnI^9US(K_STN}I?M6O@Gv^sj2EW9(LV|Ys0P2#^J_b`cra|WmKGP*msV?? z(bK!}-bYj4{t8U$E)lbVUxltwLgExw6WKJJ7dX?a06apg1xgU9T&?S{K{_K5fxs6f z2{uG36G?46byRU=o3tEyhB}T1#krGpXiet^`JO0eLmPNUY5CMIu0Ml-Y6l_w^fA~$ z3}6@XNe%n}Fj_i*h(dF_U06_nqMF2SIOcM&rsU&2(uj1& zkBAbcc%J&{LOPV$8M67I&eK7M&t%Ps9FWNb=1HuKsqfKyYM+ZtlP;yMw-|ZQ|Na%5~hA7Z3qh&r7it!1kCF?CdLp-7{MUB zz``^VJqM`GsqfiVv)P=P;53?|na;ywZU)T{Kq!IzP<11AqMU4y zxvd$dUMWBE)bH%2oFI~xpKxZUU7O(rQ{SgMIexx#obA%(aew zF!giviLARMYLTA>9k`Iymdxc@p^K^S?(H-{lkaLKzB1#0b5jWqwH`}5MncjWn)*I& zr$$1xG&e(aXhyrD;NupHL8zQ2$bO9%xdvTCUbcgrLV;E3*eK$m+PF?Fx^yVS5R}f# zq)vSom*LtO^HnPhS=L!S=5f5*PW|%SNWr;LjDCMCjB$Z5fiwDx|NA$;7MO-R$dZLW z75?Nrq5(PY@F&A8Cai%l2!Ij3y2?JU6Sphk_*ANsQ#Au5$ec4p9A7~JR2=`3Fcu!k z7YyiSVK!&SXsp+dfQTBE#uwhidrx%BPH+kD__zgkp6@wxJf7~^y^P;9d&<(Kbe>mo zqR-EG)Y$!rKDF2LSn+XCfp+}~E+K^@I>#xr>nC;>eJkWOA1pg6{vhN>b7 znTTkjb#aCv$QEP`5M|4Upuso;)x;adEv@kfov>u`c7l3q@(^t(j6dWoQz0uIa}CN! z$X#Dl@$Ad;x_G+qpy}#)#^zCR*9$7Ufa1NGD)}0U8V>R(4u%aCUIpg`et9gR)+$&o z8bKE{FQE_2ROYvYO@qira1jyt_gUnGd0hdVIbl{!c;q1-ZG-ZMG!aK2lK{CBW=n;w zCHKOLT~K1-HrF9nMh<6LfUB+vkICyOY2PGp)g;`~xz@;+jj8&}EFw(sUjHXiQT(fa zWAE?z&)=l>5Pz5NzH;o>8SQ*^XTq^+Jjf_6G=%u^+*WmX#GeGc>&5gwXTiJ2s;__d zI(@x>fp~ea+gTlcU$m#06KC=S+}O41*Y9nd#E@O94*Y20Og!1O>dOZojRkDxBw{11 z>k4N+`wN`qvpB82(E#*nU{^!vpC|duwd!cK(|xA8@?0VJ2@4FsXOo;HGmpvJXF(8HW-)u}bH1B^nMXp)eJ4f~*nY#O6l)?v0( zd+c}Xs62bE`glk7*4(q%`=&Ru&%rNg`{g-gmXO2%;9=)QS*BJLY?@tWOq7y+tvZY% zg{G$`OoExA7grTwCt|C;$o<@@gJt|$^>1;0ZEj9FLM;SxCBIgU1Oh>x`Q}=6?*qN( zatn4I1i|6H6V_(~$RJK)?8bs#Ac(9+u~>Qy=<&pS5oO^u{~yyn5GOTU%LMTiB@Bs1#6H*;v`E zY;3GJmBo$KrOv9;U9s138s5OQd;b-=FnjOfr3eV0E?E-jzmK;pfD3^`?2MycV-b*>gL*RHnI#ghN$ z^rF)(`PYemUc$V+x(My&Zx<2T*KzVPq45sKgh{Nv3(~{f$#~g! z#j&tE$7+L?K<{4HT@N5&Ef;8jQca!~Wrg6_Ro;G92<(r#$uXbkx z8L-rWwL9j)O6|GiLhThYuhVVPj-lL(^jE33MKxc!TMB?)7NA#PK?*EL!pe1>iFT$E z3z93Kljr^^_;tpCIA03r<7FU($Z8v1?e`}*+&_L{W&7vqKzYh~(tdQ0uc*Ucc9`u>6DeX?)h zV$3VZahUopeteR7#bjZe!fSGhhi~e8`ZFNq6v5+Zye6lyhMe#ihB23-F$t#VRuO9bC!r zp2!(y=#7N&-~@1;dfC+XYD+ke0Jv1ai>!}ULuw}=p?Pg*`l+~94E}JU6Ipc+9#a+V zG1#A}pRqqvGnF{G^)%&>&$2oVZWa+Lm6fKK>D2eIewH6zb@vPm@zi(kUW#&7Q+fF~ z0cUdR`?+bI9?X!X$gd~HGp4?4t46X+h_|HCp;-u*ELM2xyLF4}F%1Zv8V>^Q*<6{$ z2~U0hHZu*Mgl0CKN~2b$pVQy9stT>0!?BbJ#2W^qsqfmyZ!7J5>U)~;=j92A#yFO& z^BIh$zOTPM?sdM}7GE8L_XpoT1;M*m*|=S~@1vUKs7;2VXHSkI5Gv$p2_pFn-zNM; zEO%ss-rKhNjyLR%JY*gjnxhLb8xR(Y5^8ay6}T)@*cb+=`;9|4?l%TbY?FHl%Pq%o@E8RJLliTZ zxE^fFbAL5*Sv=9Y<9Ka%go03^bj%X708@HO zJri#_tN6@3%9N=FUf{hgRShjS4o~|Ih5$7MNHWABnJ^;hC}dl64^>`4(%*5q_Q;Ft zlArVvC77NczdSDYf4%?h{&&Vp;Pij%VgGx%;y3;8_kYm;p(%SK(Rkci!hbiC9EpBO z7pf^g?9Zj)U)=gb^+kH+_w<$D&Ey+ReCY~an$7ITOaBW0-RS>yhGqf3{MA?W>Q_Jd zq7)wuozj+7>W=(2ncqtDR*5TupBFzMaOR^gCzN-tktWy5lbdHsKQ_3rY@@buD~d;IEO>VY_Z^=*#B z@hIKI|Iq)({y*t`_t*V2S88n0XY!*U3f^SZ|MF52=%Vih7v;bE&NoF*npWuX1=NCU z_EToLvfN~{UUBYa&e>V6*tY=)y+G>{9l5f&u)q?Zjc4Uq5DyGdiOFdwKQLPNFz_8{ z%H`R}25+cnz&6TOlEmwY{^XH-P=;;}o{8R3eJ2<`GT%MXAC`Fp)n`dZHpH#uLIYbE z)B>lzP(^*NMW&kJJN&kZ*A46!4JkYw)fcFz48c0WgR3!gP@y?KsxMX-=2Wv{pj!rb zq^JpOsHCi+ci<7=$INY$U&`u>egJj6ep5b|pQ}_vF(-vSTIi#RR*sdEXvf!HAI*Xe zl@dIL_B>q?ev{LWs_=wY&z@l>(~zb-`ltWnPNsJDm(#p2?Qs5|et*gi=O;)XY4^qc zI&@l!xYdDQ118~NUlpfmk5D)aj*UAXeo8a(6BxIciz>E(oD*qD>}x?_LL79czKyXdn>AOzXO4?z4+U??~ z4XvLAj%WRAd+6Cd@8Crrc|ptetnF?W_eaTYk`+ndBy`$vbpE*95cF~H5&LNzTcE?=%z`Faf3skDUX@ zM}9ZChrRRxk3x)WJL&Aa^(gRM{QilHat@w_ zcgJxuKPNBN!@TV|FRUl+KKwVIo2GFUL5<7I5*N+SAF;#R(CUt)K>1EEu(mxH^+40+ z-m}{dTul4cLl*!H8><_u&x_Fg^cuaQZjU0@NAX=tK?1h>1B~dlSGjBVJ@}OHHonx1 z^GA-qeIL}kO5Smu0zLc)53onL7K<1Cbt1Pz$;ejTw;qF;ON43mG55CbKQ>R{)`?I6 zkb82Wy0N&qu(`Q}&qDjKp?5gG@kgHbavuJKWc$A19(&_0Kfvr%K2P@eTNVNTof=^| zR{nhCfI0fHeN?%P%`P9wUmpjd`#I8#4U*NDG% zPW+`u=RD3;>=y(b;N5k5kYXMfk=1P-lAk3j8oO_4VPOR|MSSbib{s&y;)5laL&+tf zgM$MLUMjIl^Tegv^-#6d!h^Xo-8cf-wum@Y6cqdY0P7dOv_tHExu9zgTr>lh+ry!{ z%toWy^g;V@w;niNFy#J>0+G4S3&>U1@%b3ogQ7nSF#4eOiX1IES5VdOfcLbVqX477 zbk~YdAre(pVb2}fAw1oK0j|ffg2ILA^E`JSQJGP1fUSj|apDf#5Cs4|=-sx*sX{Lq zw6R(v7qyM)PWPP`D0_SWHcHxp^r7pP#)*)wsu|9$uAYaWj}w{Cz>q=~Y9qTc8SYO; z!y#4y+Jkz^e$1=b&4mSflasP4izC0m*SWtpmzFQG!yQMlYNjXTohKs~wKR8N!E#z; za?57o=d5RZ>GKKacn50=ixnR7Bfld@`Tak~Y3{@o3{xdJ$SduL&#&N&8dQRAg_Bq5 zYGFFQ!#}n|)Fkg%pZS<8Oc-V$Z79S(j|(hZ%&j%y(mU8@7-MT2>&j}&8Q)C~??5X8 zx{L4}=yTpefnnT+`#8NIEEwox7zvr%4tKsU=HT3 zJ{AG`3$X)%mKR}^KIzl|IH}g_JlYH;P_>H7Z(#3Xpg`q0!lJ+mu8zr%wS}dX#mx(e z51e}#lL#)bjs>Du{QU0Ps)gb9C=I2)HEBWl#V}xK8i!~vz&y@ply|FIl_rc?28V}- zU>zOB*p4Dk(yl?SKx()?1|+VGhLyF|O3U*rZF@)&1YyJWpmb{pIfIf26*i$l!BtsU ztSqgJiy4t|kCI8IQ6N*Tx`lN&7~$Z=zo+&MZ?JMCToiz=ER;W;1J2KZ8@1M8az>e_i)8m3B2`0)`?o1yKopu75+iymh)3pQ|M zjE24M2Z!i3&%F|iJI}NGRJI}qzhj9(^7NrRsN)@HK4zfOb70}X^9wVm@Amp0{;w*g zLCo3%9;6h}n5t;VV@}0^%{H%EH*VZe2jAVm$5Sxzi!e6Sn%)WM)*TF!y?oz?&M<;p zx9-6Lg>~iZV-<8ch2veuLd$6HABwgli;u#sBk$_)UC@ct{t42{E2t0+<%%tW>XB~( z_PK5XqN(U}=Io)bgohXe9rhqjQ#XKa<-w0YS6WBwgY`%-2o<~X!0t(99~v&OVIZk!p$`$tq#@J7 zWJtujfU<+R7rF39FMO!Sk0z zkmcG0SuW$Z%b{MzT&)n{JfRGQ7Tl>Ro&e|`<9=i?N}_0zLMKO0SuSj-uo+bcpV6$` zyQs&?6S$qVTLQ1bWl89QL9rA`BeA?|g`}AfKKONXJB3GnIiZ15A{S+Af@wr<#?7#sQ;V5*#XocGDQWhkAjQ*X^p;E+q z2)^Zgp5ucMAF3=1iA3b@GPhgApd`I5~L*nomyJVO{i zI65IlMo7J~;bK*9p_DOiuq^?;BnwI@r^aGL6DW>Q4^Nr}&nHZn7`qxK7IIuwTR*dPjdZ}4 zf#a+rw4T}fxE#;EEw)|n#BCF}KqtS{49bZoh+L6f&rR;wa2Kduo@ue(~M4ICxXeo{-PpBu#)1ph#3sP(J@crUan2 zICFu+)xMx0Jbv{r5dHP{qvLU?{;Pip*G1^IqvMCKz9U}a;~a4R)r@>7md|nj2d}<^ zP;YD>=a58uH*TZS$G2lRW{>w)@N)*#FTX30@2lTH+#Yk);=6t`vyYEcu>W2Sx9=_B zFLh6X7T)_w@>#z4!Tb39p?sTbw>+r7B9F>t0I0@(SC#+|^5eZbctZcitAB;L1Jn|s z_;?f@XVL@q?&1QuO0{WkOe*EZ} znw01HT7DAIffjhJ`m&JL9ZV|LC0gi==Hkef@w-;NTg*~nbHAt+2+Oj{*#aF$(pOE1 zeXY8;G*)sjQ@Q(;GJW&cs(bpM2bS@tdIeO588*bf5wij{oBO-+Hs^ee^6`Nb9?x9(B5AV$O`oXZ>YQ&r6?2mzVIbDKjrEh4Rb(Li;*@ z%jBJsDW3NqX-)hm{`H^nmo~6uBc%zUXBa1$Bu!;HzFq6&m&$(3#=XRT6sz_OQ#WHG zzD^5~Qri2iBJ_j23ZM~yJ+A^B(FnQ?@)p1;llhH4{KS-x>f-3R18H`3we!w)|K>dI z5_c&=KMaAZ%uvAjCEqpABg6TH>q{R}_1U*n4Y3IQeDbJq@BV`iZOu9TGk3FF7tGGwf*+IMM( zzlXoZqr`h>Mp=LG=2r5T9Y{UHf`9dGlDMWA)=b1YW5$m+@a_5;be#IER=z6`10)#$ zd!~U+q%Q>Yj7P2p*iC&eYQ~W-xeiEwP5k+ix+aR1N(p>Xv0q$+X{iE|0fb@1bUOiX zC8`C{Tu0K0%q3GlLGMc^w91jrj+zr$X|e*DjyUyw`2)1S?Tvt;p~jAkA0S`t7N`oQ zzU5N+L1yBG;D3)S`vs#%%(SyDDqMsG4Qym>=Sofyd~+^HH#B5P1VBGi`1xoHJ}2TUTk zNRzfBZ7$+vk#%1wxOTKJ^di@W@obiv+8~VpLPe$i!7Z5eo|)9b*<=v%1N1iXUnaG| z1Ua7V0GtbV8kiH}`=I+Y2y%RimJ+#JKXJPR))Y++G7A)b7`YF(Km6$aL)shTT~^ma z7Y5B#RNeJw8Pz%ewFvs)25S+IzN^;^un1dAtE=thU0=6(%fj|2STd6Jgp6}K66dWg za%gWg|1Ib*^xER*5i*UdU;~O9`nRS@f06VOXiz}_9uqgNGOHP@@MzZu?@o+#9WOwf zNgj@eAoUx>8;AB0)5cX!35XHeZuiJ)HY1#vZKv6s$IpCCgSjXXf2Wp)@4?R+H=Du% zf;X6F?)t)@18s=OTuHN~As6ip2#wIN4tO50X%Ou^8gCvmEwbYUt}C8?XpL%H?z<>H z^3ac}^(j;U_5@A#Jf@ye>NcAtw6>_6KbL;kffVxRSp3#UIgv^tu1SK6!!yob~EW#>4=(z zVA8I#>tW&z$h?AWPzA}tz+N_Ysoxa)E!?xAd704R@&ovU$FC`uSU9mpdV|UXKCQBx zt(Q?VBviHJAys*I3BF?1`2j0S9s4a^zGd|}ze{eZCqK=n->NlPFYO^{Udnu)uFraj zKi39FvNfvX7s{fogR&gyHrWT{7K~awS3EZy-6}{)XVF< z2ANfu6Gx^KYPz;8&VuJ2dRc0WSis$%H*4ZLOPzBcea*^`H@uUsJ?<{&jW?)STi)~n7LNOxHJ zDzgdaGm=x0BA}~K2P4p+M2NXT@T-|Y$r}y>o0M#o>aPC;{tuAKI=f&J$8qO_69~L} zXuC+IWyO|~+@Qw?Cj=OyIHDL2$a-Oq!H>H>*i(3Xj5P`$;^~3pL)jiq5sH1Lu|k+y zX52n;NonHkg3V`3<|TJ#>s>og7gf^@Th;gN1p5w1vYfC95rWym&`>@m17o#tzN!Kx zR{8uF=%?PnBX;!AHs>&My2pt8q4zL%Bdkeu&zKy%0-bY1tBFitEEE8dWMZRq#a~qI zDQUH52|F#T_AIUdZ<$ehupt{>5K_AUg%_0PhPbeB*REebtkMq5gneQdFMjr%^ zx6iv5sn6URJ=Dg*`k~7ibZW@#q-7?owN{glNxN-_FWiGFxXeIm8`YK;7uKo^n`;}Z z*rCve((P1nNSX!~!&ug%UXDUGvQi^#8yfgeg9D!NZ#}OpPHJO;?E;AE3 z1ujih-UCAt@7{ti+fu`%@8HhE!f}xgpVj?t%KWM?)a>4BqUj=x>FR@`v&3I{c40a2 zqYB7&1|aTg?GUGs8+Nz!x7ZI)e|IsvZ`nO=b~Zw-8TK&$6ud)z{gv z;D|$-MeqQF{L8@;v9m_PGne)vy?@&mzdcxh4`~v}laiBJ@f8>qH@)d&yh9Byr;h6= z8B&#JolNtIdlXF^?MgMxsS82BnmU6Px1b?V_n$pzV7CR68V&^{{X1sZKr}0LinVnIcf1tQWJ7G%uJ?T_=N7-2(kWjb&$=BvL?M zjIGYI6gw4#xQ3uysI*{+b+Cwdf>X2t9>CfZq+>Wm4p64!_f6Zbo*8pjG3Q{Ba;NC@)gdc z5TwnnoQz2eLAxbO3SAQJ2c;O`#07JgM+)2+77&{0M?5Bz6HjO$6y0o_9&YRN+XW|`!)DSV8eXVpET_CYS1*bK(2Cl7g z;xfvdT7^m)QW+TeW)%az)OX0+IT8W_G&xbiQl8l+4p)qiC^2ZB%tx{y9KU+#?nA0U z*_0!IzttXN&KhL~DDALedp*+T&fz#vEuq-$xDkx*2(4w8p+V}2#sG)G3y|TdPM`Ch z2l}=~?l!zghTt}Me$_ES;VTF|LD*j%5-w=U>BUC+To_CI&`{ml|%nPRzwr0FuK=l;#6;Rpx;PcIXEHo zx^474@Fd$I%v36cZc7v%S<*Y7-u+Zxbys}{o=niTV6|r?m%aiYrjP1IouNDjpWXwz z9r>qmQerRDWUwAVIDz=pvL+%>20#{tQpmFkn@orFHzP@O*R!hL*y);CubCe?wfAuPVS$RHk5Xrd z9fqTX!pV5nq-XkdG>BBU#}z1?4C?&Wtx2RQD69LUmZCE~l644oly;)l;9zUjRyNj_ z);f!u9jNmg-OZ);`pV|Y;$pYeS?w-#yP#PG!K1yjT2qgXV6$iJ6d*vA5YDJQJl+%S z1=O6NtTAc{4^*xh(OzrNy91?-^WFfwFr?0TCkvvN?T!rpIJ*#9Wuk5l{UCo z`5Tl+a;dg{Tb8PRZ+7QqCWZ9G!ak(@2Xug&a~^XQo;fAjg~Anz9RwOzy?0O`gH8ZU z7`@b-E#(+V_pB4bBW5q|Y0?1zr_O3mi&x z4G2RvLb;v=1PgdHO)3;J~qc<(9ffD>)bgI&rX8M8lTF z!SfS!1srMe4xSGzR2zyQUTXG}7@pj2ri& zO&BXZV*=V>&yDfw2Tsh}DZ%j;3^1Vi*d~Y`K;UF`YCTe??x3cg0y6`-t(tnML6IyP z;dgMjE3)L^{zN9zjyhl*Z$+#^ylINVz7|)TaiA>GUIn`sq}IdwUn ziU}XqGBpq7%+2UEqL7OZN`*Gv&$%l_Nx7M5*Bq5whzde#k)9fUB)V{oup!D1oVK)M zKIei$Np;$^(xc>z7W)HAjuMo~lYhlWHz|FUO44ka2WeHG2Q9pEupr1Vg@>XId2{Md zCk+xcEWs;L`Jxw{joX z7uQaHA=|mWyp;RKEP<2lFeBRFw8+ zl1i*88KxzjmTSv!eDdjL2(oX?b>4iT`)^K8x%)E~N0ZgtN7$b#HKZ#lONf67wjXy= zs;MZ&fT5%D5JhUo@)l#qD*0Vl#d$+n&%^fgTE!neyi48bd=LvN-514pq4y@K_RvvM zy2UzK%mD{ELg9`9qO}q;YqT6%LP1!+`9!|Z3Ej8#$5n_D#Lke)uYP!2Y>p=&Owgoq zY>k=^o<}3_lZFSLcD^mKT*^^m&eCJ_%c6y^qRyZgO>k7R@G|^_sF~0ku>uH_38Yj6 znyI7hLVbE6^2cv*CePmA{l+`*q>PNy35Ne*c0jcU!AR&oQc;aI*a;w{in8t)>qr!@ z=?5$S)D)C~J2Qj+WDg-V$PwM4W802lt%?1poInQu`O~XKV_zMj_t9K|=tY1;9~0#; z@KBu|Dj59xFg_rV;d@|Yt3HdLGt99#U%o7om`P`efjmW-Q=vfg4l!3m5Myc-khnnf z3dsE)j_(4|3x`61=q(Vv*8Kv}3nwoDqXnY3K=c-fUK7wO5WOmxC~=H40KN4B(ffEC z@hyWnDG6I)Hy#=B-AG!SH zFQ-3tSs;1~MDMo79;R^e0?{icTY=~;5WNb&S0H)|L~nuUMOoki(OV#Tm90}CdJ9A^ z(ew&dTOfK(W@>@x9k+tVf1iG<_!5ZTd6bL;eU8IKxAQ?qrTcGy*?U7oTzxEFQ zygV*{gHh41zV$`vYH_s}sBW+j9}S(-mQ^C3C#5pvUWx2z{A@f&gWpYkkKRvn2+)Tw zO2Pok-e~qCBVUnxF!g=;Z&L2w(#w}q(cHhB`%9FXE%*ObvbpcDcnSaN|9$p578rf? z?#g0!v9qw+sX41%Wk| zMM4JNCt8wQ4gz`7u?2KTb~x1 zfm?+f_yvJ=L13+6xnyk`Re)B{}*SZ`Scf%SMuU;RG) zf&BvRUclW8xO+ihZCD+t5*Gy4Na8C9tW!Gqg1}k|P80;z1>C(Lu>OTk?JeN$1>C)W zyVKuHdj5jI`s-BIsero&T@+*q{e+mBRD)6d*;|FXUoKN%3h`QJVHIazg4-NL;)1|~ zj|JtlBPX9D5aSyr0AOGnZl^wb^Rv(H-@QemPkhlJKDRyzWD8E~j-wnZLUzFd?R_!gy?_Re2E-Sj;)kPDaYHf`x zBC2o~j@>L$2zBQ&bwelo$k^UfqKT4|JEuvu?jfg5I&MR3MM%AXY>bklno?1qc zVD%i9J_MtlJ@Qa+P**P$u04UiC9g#f8J)M|Hm0PHGKK0I0e|V*fHy4E@7o)pXsA@c z#f%G6pM!$QF7DD5&QTLl>LE%wH_oA|NRH`XuQ=rxVJS(P7fi|Yw^0j=uje41S*UAX zNM%v{Xi)P&o~ZgL$F>^(q@O17m%$z(pE^P5N$8(^VQkB9c4 zh$)j3x0>mb8+^2NGg-;`MOaNQ8>5=)6uu$#`LdOm z*(#PORWo zr~%Cqo2odU)YbHkFw`eECiaIeV|&AT6iPW+^h#BCk@;hN&is^#48TRBXoTWNS+T}Z zzjf;tO2*(p<#g*+)k=7_@Vv^I#AG23h#lvCjCwlTF$;tstE#Lft*i?2&23q#Qd`;e z8(oK`mYk@*vaq=}PsGspM=jTm>i4js9eKKCl~p;vH-3yN2KdI0$!xvO6eW(ix860+8jU=TA-!5lXuLc785pN65;0 z-MLIE@9AmA40Xa_h;cy&?y8^4!}>yXVIB)?=-F*2H=ovJ%b_lxb1)ZW(~Gj{McMSC zYa1Xt&kw zJMH~pK#f}GJVP~uanp0#6BY5Sg?z)>j-t_kri&DVPc>;+BB};u-$qV3l<;Bbh~0qP zJ)_AvZm%Cl^Hkt&h|Qpk*Nz)H?btg)8*E7ZzB7QH1uIU|mpDmR0M z%w-?aRCVbixE|mOz@2Uv#W|T%LPx;BMHxao1yh0=ccIOStP#4*=36p$d=JJgd9vw( zDyxnGc3#lFV(H7oQlvQ~gN4C#hv4Pcb{Pv%8$ZHbK!%Q0209FS%H_j&zEF9$RrMh#x?_5?BuO z2LwKriS#mK%Hq4kLNzP6ahmOlJ$2I7%r5bhPN1~2L#G=zMzQN*x(34_jIEv%H|&l* zjGa&$$5>AIENPJb)OdC7Ids&(`d0;kQ9VI?P8b5U#?>N0-FlR&4B9T{M)|O$)99RD z;K~k%!(r`M=@|TI-O1?R^So|oo^O2=J zp^KRpYu}h^Wzve&#*1MVdX_(eH5k(Sl1?TcF5l`#p^K0=E))(Vz z;$dvs|?xJabnx`DE>mH zyVZp6&bcXbtpuA&_Py1F+-4wIT$&;lhS)Nco@zcq$8!!)={0-Y4ehvJ#ru>H*N2Ka zl*A>eWyW~V9c=DM9aUgTiS5EVJ<~e9tUF|!OL{y`p|=l(Q@J%K=i|T`1mO|Y7o57- z=rQ!0uF@C!2%#Th17O+j`a4PwX*+Uiw}ZiePZ4ivIAVo%S$hfWKwM3^1GQ_n@%s_w zU5q0;nzvdACcsrbu#c3x9ET5&%g`ObSEziOcy&I4A#P4;55c|#9Uuze;Zj~vbHvN~ zHtm46LS+JPaynl}vhhS6K|Lu9U*1{&=du%j&f?yCP;sD|a_b@9WLu%_?{nNXFu+eZ zhUviTYh*h6_5mG;Bh0j%rf@d1CJ(l53u;`~#i{C@d@seP3K5uq2o*BQr@W6@1uxKS z*Kb4Nh1$Yvq-pqYagAEt@>11$7{vSv(@p0%OJ{lRz60$gdnk^%Epc^_vvr71un>WS5iZwETmdSTR6TvFAw6ckALiE|Od%%}LQx8H6SF$3x z?4?2J*r9xSChC%O%l3wS+k%f9I)?9r7&8ncy3q@Q4rT|hbfLU^i7M+s#JU*7e&@0f5iN^8`xJ3er7GH z(|*zp!NrgR2q5YpT5VmGYOr>9)kz?GD<7xXomH;<-P!rwS>=wGAC*g8e$2SVcW0mH zAI5p8FY7i*$Lnh=^!+oQg%hq`t;tK%F>Wk!4z@80n5@u^piyZ7P>u!x6|2r1x`+d; zJqiyn1}!H(bR0cbBCD{Rxm>6zbDdfN4HC}33W=;PEWnO&hIoQn>=AF5IF6YDcVPD% zjDAgwBDY<02fY&wWg{A@5!IpJW3w~SE9yKuc&FXM>+$9uVjqx$SQWBH5^eSAk8f8k z1DVg^ylAFuwLP5GTg`u)$l9uiaY)$e09K>hhSkvn)ljec6f5fX1N`EIO*NfZapq-K zq=?iGqTZUYW$cFOioFuEU;KQEz^Z&Nte|z(;yZVV5aCiBvC`{eHsiM zC+K!FTqZmCNbJOJS4#+Ky3$*)HJ}OOJfl5D`AFG!UhsZGJio#jzR!gwCc8c=ofkIA zti=e!YFFnZ7HxdU;PC-=K1$!+hOp{E&Zw_Bu?QNPmUgkKMN>}V!(7GOz!jBMzU$Mh zz!dsDh!6EpJ-~wiv1*>yLB|DH;G;*MK@>`?IvNed>}s~UupVQG+nfD39!6WW+J)7Q zOQ4(!X6k~OdUi|D%#RrPz6G`*8CnY%sDb_%1NPjl|$08p)}KY zts?j@+K6rWj2+`(^=!X4f^3Ey@ey*yj)nk&Q|K@VJ5d$S)mEr7+xZ%Sh14GgzB5O_ z^>Y%}exnJgcY>+l5dKk?C-?>kYWG&mU$5p90(_nxJApQ$lo`uITa_83B#xY^@6EE( zHnwJF7OR$ZABGr@*Q8f+Qn|3pI?mAXJ6P1N6HR@m{-8Po)!n)q9Qt%l%AqLE>#1+~ z>{8;CrVbvPTIki8WpwB>ZKgE6sqe_2=0H}$u;Re<$ZVLY@4^qE@`$!V%NEOCX;08w zv9qVX@i(kV7I+#%sLrfZr@j+&7O-rzyit|-g!#uHN)OItDE;yDpBjHS<{wG@6jMJ7 zf0BAqWGeEGl-$?7s(1QQ(`?C*6t-Uq;_;7EXH}}&EYWQ6~l*g(|({FQSYQA4C z%G4mFm!||kEC2yd#OmScg7jk2MSTis4^rPoM`zU`$`<(FT*iZ0&7wKZc+g$mTwC7t z@2IKVvN)+pX%)CTS^r_)q&{V0KojdiG#Oke%G5MrwZgB{Y-Y!@C{xoe%G59}$v+W% zO>w;>fz*=5%<%Kvf@ufdXyDgN9Yh|3-pD!iLMeb$FGYaUIK5$Pt`qp!cr-tSV-wel zSR!7D(69;D0xso#?t}ndvN@+#g^{qPDK%8}sO&5^5r3j8<(k5^4M>^HAW&BDsYN;jkSKujvAhxSW3 z8E8lSj6b{nJ)$Ib6!*ReI0Sc6h{qYX)(Pg=A>OPvVq&{MU}DL_CL#u?9Y8jkf|5&c z6L$E*J*Wm@uV%NR+S1~}S{13x8>=AP0M;#qW>;WP13O2zx$cmd-cGyky9jgR#G!%$ z6xyL;(AEDku$cn@(HCh(1kcy~Q=pclm%z#x(6CM`VF^`)Re-0H=A(;zoyiRV*o;{m zHWN(v0$@#8xnS2vw}CJ})CiX3y%31-_$cimbs6l921DMhP(1RUOji~BIEVb1giAIs zAC^}E*lLMW!ebB1;f#tafEyuGy3z^&U;SXw8GKNbsX2epiZV4tnVO@*guEi{&~D~=Km@3q$bQGK9=iL;7K1$?M*x4TJqEnBQC48+hVAv7R%p-RI8bto zViy2)Boju=8w89M8suR`9*lr@7`y<^RN=PIdCvoVTO)T{LG*N4qNi~_31?w_HLHje zWoo`=WopJX%_1eMNXa@gC9A!@vbnOj*ll%Iy9?d!;>yCp(z3m@TGP40Dht49$JODH z62h5^C{#3PLa5{#Hv~Ytqfv4M*fTt;>P?!|XdV-Z_l7yh3f)MXL8;d&Xa}Y!aoRA? z=cajL!{f6|q8}7$l;e?1C&W zi+zNSv28zpy!{E!G<4TPc2A6qo{w(NO-vg8Dw0?xq+etv1>Kc2e)O9Ig5(-Gc-elj zd(N7YD)KmBMA1A$%1{SLVs*SbFf@@^f(f&Q#mVFzrK|x?SWSOvbdjnFhU~$S;<9UG zzM`-0dAbNPU)6DWvN$rIBgr-H>pTzWFGQUgZUE*2bdSX3L|2P4HR*0F%G7*a6D2R4 z(xL-#yIo(CKO_#29LM0YalLP}no3aKg&c zK$u_)tN6NF#YF-CqHGN!4bewuo_Bx z;G;4KU!rc=vab{Ofx$Xt0PtuT*5K8^Q%p0-^p6>X9I< z#0>oV&?3u<-ZI9gNX_Rzb>n|2FvlbW^*nIXmG3>d`v|r;&W*_t)$FGl(6^0c!_1^+ zGsm3M)U-g6q)avW;Ory`Vn=ZLl8%Wbg8~<|Zqyl8`|-e&5-Vs0{@3V{$ObiW8vK@( zfG>+B8ZSOHu4xvS;{tPBV2+7#E-=SDtP0FA`QJ4GydpqIIo>@RMEnU6ymlu8;5kda z3YaU1l2WM14cedHV7U-%e?M+);h@pSZ35TMfvlBKF)spVeK>;YV4S*yJ%Rxgianx5 z%#2aaTp_Vn<*k@6suFWlnMn6@iULWZxtS=FL}Ih(JUqC7Tc9>NO24FaJ_^infjKTP z#|7s2>u42U0&_fX-GSKl9Qtt1Cps+MzXYoA!kE2_A>v9beH#oLtYa4V^_x%d%VYVu zii4sNbfLk9K0w5o-)3*&=m5&N0nzUv@$Wg@MxRAanC~ghp)kwlxZ}`I=7@p|iwR%L z(PA!vV@Yb66!mrrxOM^8o_@IYt8aZ#x>`8x`H=Ege?aebY0D~gN4_}JA?`|qP)vQx z-({;wc!essJqM+%Sv!iYPxbZE*3|dn53MgsQvZs*`55e7iQu35rvEOiB}89#jLB7c z`En}S{Fm9(3Mnsjhe;6ptN-`e@BSk`dG+qflC#)x7B(y0m361GveH_pY%VV^RyJE3 z>l>@9Yn$r}o#Va##A|_OZa>3oqsvtlkWrLDXkQu)yILIQ)Y9Td3K$z_hEV*>yN!QuMFD;TJgk0*6n# z6F52-Xe-d&p{h8%nd1bd7gc*onrVrMs=K-hOryZz7u??jcnJPZaDWB(H`55M_bu%+ zIJ@>_fy3WwrsR|Y05HXzPoc!*nNOwz3k;47(f@SUuVnx5`=*NB_v3gNZPjXL(ERf$ zzzo|Rdl)<5pcgiXffM(Gj>_ap+>WQz0ANdC*PzTO82EjHQo%*3DWN+Qxx*k5qsKym zSO}sTDM*?oV|ezF6Gk*nZKNh6AH%~x;O7kY0a%9UMrriWaX#m505a*X!17Sw@CzJ% zfy2)!BIA^g1Ep50yBECu&6Snuu39Hgv6%GIE$ZD6<8Q-;PFyfgISG zRQ3d3L!v{4St9UFsr&%_P(MmCfRQl=NEEV@b28XCxBOY&?K8U zJynu}h}A`bC;)SDY!^8E0*7DV@C)v5r)oVxjvdpg3LHN4WSwk?Sdvu6LY82Xqyg7q z=Me5ggnL*o%mG}?X`=`X#4sHsQdx`g6=k$eFzHJobpl2vMFVOC;`bwDIEuWE=B*Yg z(je9`u#b`oT^SS5wpwnCS3huK-cAXQw;<60pfMN@!9LmdBcv>{z0~-1#zLM48Z~HO zOW;jTr>QMS=?*d>PJve6K&AH%685&k-iY~-5#tG`;;rRv!soNpSZ}G&gM0>#3!h~7 zB$eH=p6$;5s)1CBy!@pb#!xZAR&cZ6rzMNyy0;$pw z%|m`&-m$3a`;+zup=$Sd2Dz$`2hXICG-S5z)e z*!k?DaVSLt2RPBndJxQ#Q&KXijg^5@C`>t9_pa?0w>%jOkjih?yFFy6T@Ul`XaZSXwXV z#Vw+jMfB2Ab=+r>GCuOV_Q3UAJH+A2FhCK#{Dwy_&x27Z*$(`cf=4Ln0=Hcj>u~aC zo;6RwoeSWXj*JNMnx!t0+)7dv6r9H2a&V*%7ujxuCna>9_=qPm|&;4o!#@MHlHW3N@QIzT@_ii~$5_gAh-Tg5iP{kbt?dK5MEury>n4`SeO}Qxu`e;7N*)y zh(;hnfnU%72}^iKc>T$ZiTweBZZ%(zpstGJ9x5xVBg_0Typf+WP3nkv`v@qftXSh? zZr!?tPz)YaPPblFt%PR_&#R0)2ckRVx!h8Wkp=3cy}z|(rG$Q8Yt+ekEYZqdB_vC!O1r*x5N3?d!w|NR;QvEQaSvue zqOhgm>y1b;>jgcxJy8+QTF5u7?I;=zXu1fNCe@^2iKz0OeZX32e*pC)b_1+PqscjL zuOCNJBkP9P49a-zxEd$x0jUp`pw}kd02f zZUoyOz5v`Q+_I9%0Y|{VjRX)36AG-6p>6FuNA#>K^^;@A_Yf3JPsLU;3~aHo%dFN>;zu}eZS@jucY?OoMYN#;k=k{ph7|RKtB@NP_ z8cX(`Lq`p)e^t@|_&&NzI0S0%%C4eqZbwnjb}=^{waVD(bb%{79FC)t?^k{ad0Jv! zJ;Bh0&aXz;XiS@X?lfO*t9C8-3#o|Yyoo`L-V&=u#H>RGOv?8_f zVwiK2(xV>>x-NSN5?00ItUAr7?=8aPIz(4Q=|YAfO$Vke6Vn+GT6(t5C@PAbbQj; zsw2IG0xLN)9+t)7(afA4X<>{W4&70!iV$!OQj!gnv_|mKGN_ zx|>Uvb%%_DYscLodi$^&g&v%XU0hQ=I8jG{P&nbN|8rTdKPSiP9;@h00M z)SHrS14qykBG^;xfWAgnuyE)riY7Ttu^$+nLON3mYFyXFsmk3k79`-7Q6T~o5TQcG zgE{YGR;MIxM?FVUyDDe9Gz}jvuCdbO@>11$h|--n*9C8me&T;D4urs1_Esvm1N()B zWniHb+Yj{)PJL8ALZ=+)hR)C^f_%%(o8n-FMmvC($@W1LL5_e?HqlL_gMotKfG>ll z9)uXKB&J4)2EFx}u|s*IOw=XmmcZyO__(2C_zu&CF_5^i7a((a7~+-4HD+%jV7VYz zh`N`#(_^8e&z!t`T;EE4`#9~PZ%QsirQ-Y0cOz@j!jHKE3f7W3?Tr}A*7d=~Du8Ht z5+kXtE5>+Bal~tjAe&cE&!!dZ&MJ32G7QoL1N@kAi|@`p&p(VktS{>}NyqDJEA;&{ zo`n;xUag68lCICjBIjV62_Vo_To3}Api?;-1XQd#bLe6e!AJ`U)M+6_+d*!&%$4A3 zApW>qs3~)u3Q9-jUxh?g7Zza0I72)^Eq3HPaM2@lYd_PeS74%%GXF z)%Kv?=19Fn&w*K4MQS{ZF<2eF9)Eobv48skesMw+XXWd$;>^n|6kc^bn|4_;awQ+ep!XJ1js3L4Z#UaEphaI>2Gn zLlZ_e5v$5lK2r8wNN*U6>2rqfbD?Dum7EtgS++&5z-m{9B^E7s10cN*$TCnxq#O1R z!aQ(sF0`BSmC`))N&s*X1_*ef%$~4R0Pms($Ouw)3CO%MIlz#D(L7w8a* zNlbk^e@{lcnS(k_eWTxP2^kC$pz~W=sVxdl7fpRHexejTsMu5oCz;}DFk$LDQ>)Ib zV97r)`}bsw0du9hO67U#JM5I7PL4WE#f|or@u3|B)~K7t(G%;+*z&N zx$GU6B0ZP$Q@_C8r4yZQWj6a1$MO}6+^ef}@*jR<$|{|F4AMGbIkno+0S*?r4Qa*F z2Mf{H29FTFkAKRnZC60TgE*>jP)o(Jz2IO^2GDl#t<20bhIUxZ@PKbhf z;~%lAp1j$Q>lZ2daYJhEF#>gf?2rzelCr7CB`J3xF$maTurl*F@NV0bP8V`C&23rwQU%n>R-rIz=1Za(K17H z9l-+pZ`j%O^@ro*M2GZ^RBgZWLFbuv7mmh0&;Y^)jl3ro;&dXR?Dzh=S*&vT$Xm=jRt66a%)GvKXEHC zD+44xz7I%=epJf;PMj{)E?=G_yZSj8LJxri1x;7R4PW48gXANJQId*+?oa zi*KHUW~7^xps5`pQG)Ri2}`XqNy54b*F9iUw^Z0fFitH(5wTq8;PQj^~gv$QdB+A{jr- z2U%EHq+G&x_-zwFLd0?#;uDJMNEQj<@rs3-xdGRg?Zrp+#p=Qwu0yk8tl|M4$x_nt ziO4slqVQwLwmYIPmem*i06~}fP5E4YuF^G+tyOsFr{6=b=JPJV2fnNaHK(tAm??QN zBsiiz`{4_x4W2$ZZm@7tC|oLjxoBjyXIdcFHPrGZ{?+f{=G=pKpf6H3^W1XOOK^z> z#3=+J?~n{tRbiIgFVBy^0TGC^N(J87lX852lg2tuFUR*~M@r6U=d0?3GkNBeBY9@# z#&{=)^@k#Yz=A`yeyH#TK`+NuKb-hTx+Mb|vu`+uPg{6!ldTuuSG*pWWzkeqz%Ocvyq7meo&n~QAUOK*q^ zF0rC?^x5bmXY?2U&u!qdF)cx9McyJ(+3X~R z7ErS=uu0X*JUWeyu`PpfnEF2bVFtD+X`OJiDp1GNH@ymcLMtRF1A_dKu>NuuP9SpV zJH~uWeSdx|s2J=Dz^{;cnXWP9Aqx^i@vcsNKUQ_xGWR-RV=DPcr4!v(KoL%fh7RdD z+?x7s{lscc@NYI}kXDprBwU+qU&WrGcus>VTUPT#Ky{Wfo8hFXpPWCLoomi4S2K>; ze8g+&J1~2~{fh}1k8?F=R;Ioq?`0?9^x)RCt1-Wy9ey3sYR;@q{qQejhyMaTw&|C< zmCQac8WwPs&6$}U(w+z@1W5}NfOqOA=z~Oc1QgjeyB>fo6hI*N2C>8%yJ@~Or^*#0 zHk=>7g|f+r2LTE_A$4Io;rf$YTiNK5aZ=5m*3-8EKcCbH z124Zw`o*|=PJ(PO3xE!{mcd>-^7kX)&2NAB(fx;%d6TaPBph`d$X0o|QqSY%`>+fe zhr2$o*sOQ5CG1BJZ*R*o(aVGUnA@Ws3#=^9TVPMz1m_JYdAN)QP6+NIBb#sA(Y_P6 zh?@tIt`L0EwTND9<3hJ>Tjkr^+jELs0CztWh!(iFGB-YO?B2++$`5ZpAiLJ(x;I`F z2yphHm+9Uk{_eaW?G9VdYyj4-A;pu-0I>KlpO+SvmdGo+9oh#jrsWCn!|s8NoVsnl zb7cMNz(0hj+jr-!yN-`j_ZOsAf{ltxe-`w8Jn-XS-g10UJUJvMmeFE!L*g{vopeWr!5)yDjn9z*VTxJ|C_kTc z7m%|SW^rGUq*}V)2ov+IJACVjw;<2WWg#89iDK&$C${xlIYM&ZlT2%+jb_|deJ3`xA zVLog zgZ>Y5ELFI*g#S`bOv#*%+(v^uXef6y`N~r;I z`g`Zz{+oY5R6uFVDj7?Nb=DZ>EM-jfue3$)qvG3GfR$}ZgQaNg7bVd|uV~EvgyVIZ z23&gi@~dc8HhTpt_pk8Z-)9%!KfHRky|}!zxwh7+I4douva;TBDx0f|OOJLuq>2Zn;?f*mnAN&8LH^E=`|2c8D_5Zs6 zSD9VZ@2B5hs(B<0=af`W-UW z$Pcr(6a!8H;~ru3P&$HvB}6emd51cH2upM#g3Hh5S^1_B%3M>w;IHrWGkn`$sT=Nw zb$*oct8jE>!QaY7`)c#GqoY^U;L`a~*(++xzN zDCA4coYX{(L~dSypz((F7*vdgKC$#LDF7-AjS9;~f<*xUFr3WiZXtXClE$cS4;>IK z-mvZn31j*g{ky?f2#ORXFnOQn_+{LyXd5F^Jeu2uSnxv6$lRZ~U4cB4t}yh^P>gAW zej{sM>h^0?=-WWpyv~do1kQCMc)X-btUsfHwjZMV>#j0r!Chm%I=aPaLmXH2_Ce zWL3q(P{z<1ur^@ipwJtz87R<+BouH4fIU*+D|xshpV-Wjp3XHb6YFD(B!4B_VAY*O ze);I6)bbKAOtaZEwT>rbCnUV9r)=ll*&J)s?yT&mM*#nrp$-9KS-m*lYL;`#ppRAB zN}6e4wivb{F`>fKnjqaX2;W^Fos*eNaK3Vxo|7|Lrf@c=y`Loz+ulejv2mP%cXD1> zvYR#fmJ$m{DO8a|P=G>=mt{ijMka7ng?cvoNVyNpp^ojtvWh$S(Zx?otMAQQs|)yR zojB*TRy;?OQ4}WBMq4=yJn)?YXB*<_MrO=D@~~NKELU1AM?goR zv}Cdr6z(W?QIHe2p}LQ`(-1uuCiOMpXnmb?C9}?_lZ*5^n2rxO@|3;OMpovYrTb>~ISTXIep&H^a@$kK$(;FD+SJ<2(2J|88gUu_MDFKK zy}Yeh`fLDs$>9a4 z+;hsa+j>*|LTQ2bO{;O$m9BvwvyCsLcyf7`jnmCO(jVz|Uu_#v9#c=UZe1^QJ7p;W zAY&Z$gTwONRkra23UO_9ZF6yBZL`u{Usg(hHwumpx4`D9VDx26nqnz(=H$12>{rxRBWJHuBfS7K)0X{GeOOS;3%*_f{8eirj1{sJ9b4azL^3U)cDC<+g?Pr zF5*4_6+#tYI==xYvs;cwCLD$Id2vN;{-KQMk!8PNn1F~Au)o5S%!XZ2ld4wBiBW#r zI&e`U+x0*o1ghXiVB!H~8=zJQ77>tTuvAyn$_*l>utGhQGe%KrBr$BTLvg^8hO;ufFy|{e zKh<3F{I;$N8NW{A{s?$J5LhW1!C9-B32>+QIs@MeB`kkMLt8iK05gysx7!8hGcb2* zNEKdiU>_wYMMa`03|?2%=o`9imXJ@-A|OvzZj>{$hhI)&#FqwE|9*MOy!{|ABmY<| z_|cvf1%TppG+UP8CpagAkRPD8fy$oL24lOvDevLxSI&F5w7KLs8Dc=L11Or2Ata>R zXWrT(6nU%pZ{uWb@$<+5@nRe2JwZeKt!Y5n0mMF)l^}q!;sSbokN9W*XxDFpJ~0?* zB(%QGI>g1^p?w6jwgzAu(bjB6IAz*SvpH`yo4me3Aa=yxsSx4&A(W6Ngt>Zy0E*F+ zUB885cVxxGsgyQb8gkLzfX5FF>vW`+J7n7l$xAG9cDx!2;Dy$xw&lKy;v*0Js9K-m z*kn)8WY0sM1kOr0&zmJ=K7>wh7?)IU;FAOJH`w*j2p*(nNJJ6n<7A+&5C=U6x1(4F z595n=Onf&M+&v?&v+H|7un+rZAGqE?(BsuARsm>;TEMICZ*5ryO0H-UyC}(%8Bsll zIQvX0A5-7Q<g~rmB?$$4KO^hLwebJ)9oT`(Aky+FeI!+OAO{ReuX=~l=$MG=Q zs@3`^Km*0A3v&lKP&McTQO%4_8v`}5BIeu*ju2rUThmL5LC~D5<;Z7uZ$Niua z-7t4mvoE^UY@k&zkst#pXH+lmDw%J!J+zwEv&x77iL8su`hl;k=K;-a>a-CuW1Q{? znKbwU%L@Fe)X>3U2YV73T3j>H2>c!3H`TIENLQI;L1`(Ip{%zpil;3t`k7jz7V!w| zv!sM1&8&K!zK}$Dk~4C@o0z65(Ff8U=akBR2ba?TCSwc-0PWX9N=GY#ScBkKGZU*} zkZ6FBRMlPo2|Nh!Cpo*bNdM`=kyL^D#kPx7T2^@{$==fAgW>Xc?8$gQ*2la7L$m9T z)L4!+3LoO>!H_+)J)9yG`$+&%jJg3n1c2Ov1OXekaX6s!$P#0Cx4O`Xs?U+tA zAwo#P4r*-4uhIiUQ>x>1Z7?odo9Hp(H0V8STha@ld+j!o z;$_MM9*J&fHA9CB1+)S(u~E9>FG_4%LI{Jc9p^n+SzlaT%BVfqkPQzMyU;Fthstw9 zd_Zy>*3bB}>)+#i?_}nCNv+Wb!Q<^+|BPGf6a=z0dLxkjt7QR-Ve9}dKf8qjKUR~E zNxN-_FWiG_5cX$y5lF5RM>eKDItzcL>H} zJ`frU5}F`Wdwuym9QoWWyS_LSf> z39e3%RwG`!0lKo33ZT0l@pgy=F}=w(vZr)$TzDP4wk;t-iP`m zITdJfqJ*V^h9(YIjE{^wnkVy-EC|Q19=iLGYEU-i2;gtE$C$H5S#_dedp!rW@8)nE zsFqOdcH9U?cf{GmGoeB1iIy6LFI)@^zRDb}li%||-`2?8h8M{Y+&(h0g_A2HLuR!M zT`m|711?aEdn(#Q@ezF_vlQmAdbsO&3{e8L<#<4zw~>$#mw0{zNhzBfI^n2=osFr$ z3WQWau^e`jQoj$UhM({bq{~e;t_Q+=o{m_3Pq|!m&NfB}`qh92sJdiC256WV z$j$AJJ$Ra)$3u8DN|&Mt9VbIvT3uYu`d^xqCnE--)ar)uA0665nNF@9D$kQOFWeMW zhfX)Ha_Aq(vTiEJY_o||z0rYwQ)dzaoDh25HhNwF^(%y#N~O?kiNYgGdgs%-pX#gb zs_#(q72X1@_Kf7xS5SRQ9@ULHLwOE9y*HxUNt zL@%f{UR2XG9Iz|6CtE^cT+>`wdrt%>IwGr5>Gg>qHZzko4~=#Ofzol&(HKLjPjHAh z*=Snd;ePAD9|9LyM{7p(6irfnuxh#2#NihB#>A0*LrW}lJm-K4n~1Gpz}$b@^Qx>N zce&cVhw9uD1rKgFv0{?WWcD`p6M77sr{;_PzFF2g;L0~3Y$!a z^fx0(bl3S1wUv#vrM1rDW(Vs0Mt5_my}q)!vbfl7 zbym9z-R|Pb!UB?TmR4)((GhI+jGcn?TqT4vYLC%HMSD>d4~<$vIm|U9+6zopQCg&n zseK9~PkAOe%UbX1Bi4_3Ql2%Q7pEvXn7vvI2^H{=h03DNu`U&k6 zOF}h}>XjTO&@HTw9(@*BWjUBhM2EKh{PFfD|9^X5*W^Zap0~2BiZ!tnVjWVIv_Vpfir{oiuE9HHr{ONW2YfX%Q@db^}dn@S3*?)1lDK#oY#9)_8+PBsbTt zU6-k9wy5IDEer}?R~8Z8n;k+b5PQyJ9mX#l5)(q<48;oSARec05@2%SsKS6TO3l&I zz(~GkA#RiZLqq5F`-n=)EN&D&;@W zXKWB()ilcVQjHNvgzKpx>fOWH&X=!Lbe?IyHd0U%#yHc}d~R0E)}YVg2`CzJy3xVF zh!Aa3M^-EcKMAp&wFl@Xnc3Q!e0MO>B8vCOK2sbpJ9+JI(0nP=6nisIA6OLZIIe_hOMheBK=grl$As44sWLfuem=&Q zDNR1c{Qp7F<+<7tEb)^^9QMM0)@EZ(dK1qYR>`(Y2 zaQEOX)AG<@Zo!`rgIr=zn%WFM=c*JVC?k*eNjE4%?#*4vTy|ugCIz>-DDMOvl(cne9)-F2RPD^$8StOs9BRuM{>h z-PUvuGdrEx*R~M&I#_k$G{g1~Td_S(F z+)~kFf~lkF6h-dG@+a1h)&1Lf#brm?jKh0xuUG7(`kYkX*Gj4n)bY=sKCgZ?>n`MTy*eEg^7A?*-k2Vs!(2~4C@G=ogkwCAyPSHu$PEC}6tM30UK zRT*T-cEf=g-N=`0X|*`S?X)PIL);RZPDmHxPgD3| z3B)s&$kzjNaqAGbL@L|b zqbZnlxd`PlVCorQ=`7OKA#MeVY+v*W#;=s!N zS@drI$|XR3_F)<$L{eh}!?)i^`jZ zvYzwg@23#SsvLL~@?H{)hW)2P7O~2~f?BkV;vTRnp^^stZHY`-wka<}Lun#we`=lU6ZB#88Ec+^y!z;pjoZx8L`WqR?xcE-rQc@-`Q%dZu^~UtF67Q?Nxts zcV~06+uHHl*C2NJ?9ZmV{5hpBpM6WZ`T4US8ipl%MwH(6`PWhb@t=_fEwHt>+=p-)UlH#sqQ$DgJ38GT8^PB$`$49~aU$zE zl81F2C-Q;kIFYB4(B_|Se=gdnZ#Yim0@HrJwzV$RaUwfTWXFk2nkC1H>^PAT-gKPE zlteg(=GJi{J5J<+l!q|^&zUUZOqeu5i7Pu#63ACTC+#?qRfNlNA{QB$Eg|qMQoKY9 z>^PByGQ6ZFAc;1|iJaAfnw@G!W5<@c{2nObR|*p;4$_o6}_@h zO(s$asY43fNjdlQ&1G-*p;;G)L3&30xO}58@9{V&%X)5B$gQuk;rOV5>PA`-LT*=8lTC<^RoIQU zxZ+)(goBQ4n|-_=ooSD#LN&ih%_fyQ^y>LttI(a*`y5l>A?W&(0W|E4Af6Nh<{>dY zR&#X_OGc?6-UL_$77P|z#Wq0_?%@PFlp?B%5f?e*g7i&LVl}LI8!va}T6^43X9}|ekvIvjirFAaNigDo$ z%cKE%FO3G_{mN5tgd7gKM1+U+ZOkj#n1=*o+ zXsD`{X5~VVRaCNNx@hlWX6SkXG!d$es`n;Rje|2hes*EzLT^{RyD?>c`BtiABI8HN z$MTTL9)K@Rk_mLTi`yE9{pjcj@+i1bHD7u)oeB5XAJjOK7%b1bjTIOE9EvdSSvB=4 zFMD29HQrVZJQe?{hEby%_~|4LlKS@g{_a((s83&Ng?>`Mi5VTp-EkWgLwoJpsJ`55 z-=?q(U;ADVRBr^sRv@CU32yiCD20gk-BDtM6F$g3ez&r#jGHyC1Y4jYqVoP0cfi1K`~B!uDK4yyzic@!m_4$&jv#=!@r;G&~ViQSAGu zDTfgbC3x{0AhByLxgaFRtSHQ15uZVw*G>r0_!N;24sSKj1DPbT0HGNbT=a3*?6Z_ty;4Hx_tCzvP z<8#n&*`4hxs8TmNm{Id~N}0#ZQ=~g2orTT}$3X&dNbO^28{0U-Y3S=ijNb`D(-76T zh(lj>ZqAP7;PUr%i)FLmec+J^7rvVw5wL?4!yO;5Of`NE0v1=^+mDOPxT$l7|Vv| zd0$Mo>1idOw^LuOZmh%KM3!v&H{M#QdsYr=o86~BW*-*j)V$b^X{Z^D-D}&MyX*V= zo9nYJ-QL*W-df+zA6;x4TD5!8<7W0xTXBkMo$d7#*#f!t3!9e5D1IkM9c%sE*n9aR zx0JV$wf<$pAv@TTZ;aYl-ci~JXUo0K@^R?vuM7G*Ge=NhI}@z`g(L@lNg?ZpFmYg- zCTeE+UQ7`j>~woLg6@qFCCsq{<~y>L^!-x?5GTm~$Z3jTbK8$Rlryzp#&tuSs)O0* za{JU2foHM4WmUs|8 z3Shkye@feZ=s$t--Eb72VWPK?s=^kCpni}4kDSb-AQ)RikWV>0lmII%+99k=J{y*Z z-$sr|jZM5}91Q$4Jmte+sgGiGSC&(YCsYncYx2H>RCpollD(D?oqLFJ!^Vh$7=4D0 z#1DI;Q3s=gM}{!v@HR=?7x}_*-U`Z$5 z#_vRf!Bf1j@d(O!nv-P><)18HJz3Upy!@#lb@{Up7C%{jP`(-8u)eja7KrpyeRrF& zf9A7r!qsbQVw~iC+}Pk4ya$I1gB6DfEGkWlc-3S$qG8op!R?X+PFj45K4=B$;~+3& z#opoe$LT^#ne)^e(IMgZYf5BieI0&GFvcBpvM2nxap2{>=nJCV?5Ju;qiEK54jH*=$I8UM>1>m)1O?gc~(JzG=wEL<)A%) zd3(_OZ`swF*g3G*YeRUAVH;jY4^%_F+NXH$>mTADLEKbB3p36>%uB*yIPmG0Wj%M* z;~IkNsJ^Amcp@VOqjKP(VbYbb(I;d4<@xP4whE?LX+)-2G&S!g)-V<%b7(Y0;5?%j z`08r3@E*l%GcA6M2L`92*)!x(J1zSP%?cJIJu*8C;!!oy30@vMQA5{09fnRY>UJAB zm&q|MiJ#c*njJ!#u8snndRW3kU;HWNaoO8}JTiFV561(N zY|GxJHX0^v`k2Y%Q>^@?hD>+r^{~%G`fT{0NE+Ifb}_5PQqJsn58H>{)O7H0gj_Y23f?pXSm0-OKfzYm58*f^ld)vso2@Rq#}wObCW>}xv*RUD z&W=pkktr{ObRT?@cj>O*fcX$fDzhpsXM=RS^<{>Bq8{)!CH`jalS0TUhh#?PTto6- ztPud(m^;S7I`E_31bZ{~5mHm)Sj7f}pwQz{+(~MW4M_O8kl{GJX zBl{GiHw$X!!78E-u;?(%M2<n%`pZPWZ;X3B&}@gULruTpaudPI>Wo^&-IJN2J+ke5Se8Osx7F+ zf8rHF3AH!fh`)X*hoj%mMO(htU+JHxK{Xx?;Fu!H79#Ey zrSKuKg^TRLTUbz>dodl82a>b#_qUeVmhep168o)<*8WNKz7FMqXRSBsE(8+DtbWxN z6%?YYa~ap03|l$@h$cWaICY%>goe0ZPy@APK`#Ryaa)bdfoBMr;qmN(t!OHIr&?J{ zhvT)|h^Vj15xKgxx_5n*fsA<4PS+3&pQ|lU5jY351rpq}jeJ_eOy&%&UwCdkYk^4` zvsJ1nP_H~h)}>Oo&ig>bbb#WG3jbakp2@vy(_c(b3zgO@lALV1fuk6hfnpkAyPV5Z$X?*P$|i% zng1&e9y`ec$Rq6|H}pz_WG8?bnu{7_zLwOA4c2PHC2r^y4St zX>Anu*8Enowz;vsTSJlH-p*080ThX%>IiyV4Z1|7AW|R>tfog%%iZorAreS9vS^?< z3YD7Kj2(rF9N3OR#k=i4LOzZd6fQymesxUN34vC|*%XkWF`3nC*UK35L;K~1*hadG z906wniUTJA5eGxx2|#cbA>8N$AT(tqum?fv|2_&p?6un6q9Ni0Aiib-5Yv{XD|T?D z_^uQmrqsx0iKr|Gq^b2(09ET6Pm1Q1I8y|P3TCq^{t?VFW)Oe*@q4$WP6-@Jz|E(? z%Kz;8%G`3iF9RMvC$N)SlYCt%ki{RgBAoz)6M%rvMK`?KLXsR)LSfqs$RdCZI~mYB ztOU3r;6?J|&SQiR#a}(fcToy$aMc7g^DYuT5Lo}zYSIN{QGKlGMVt^w@iQcAq@Wex z3j*AM%O3h?S#4|qz9IBVcyyUWO_-Jllpj6vP?M6de8yBDm?~p|zhwcH4)TU^h$jV| zrinUR_q$X%n@6a>)}{EP_3J?YK&LA&nqSNt}PpS-)l8>qr8EQR6(YX zrBIx>hKp{Iagc2_vimff_+_JmkEE`**}Py^Wz(eOraa~f#cvoW{_3hG8rHr-5P!e~_NlH)`uE_#T>-Vt*atmRuWhg)fIQh@f0v#=qW|e^m8@{s`4>^+UNX zf7e(Ob>J<#(x|USY1CJ?BI@C9|LC1}asXuIZ~M!4Car$;PJiW2e)&bX$cqcWw&&t8 zpB*g^X@_*AT)iI*#*$$1qvSD)j**j0v@l=Oo;iGvrw+=Lqc@ErJ*U-&;+d5&gwk$s z37jHn3$`Stb>LHpo2GTs_GNzhrrZ3+kgs=F9P-s6Uq88Zjqy7lEp(dh9Y z==1_`Vfk=K@hMI@!2sVs2Jsk9IsW4o&Q^Qc;wtYUEb`MoS0g}qHFki+n?#Qim`K+@ z_}Q&H3?l@MqA7H% zf)yR|)xho)2OEMI(tkAp69lpj|^cR z$!myXLV07SL&PAYHO3&6f-?+;C$O^+^`xO=FyV}`@hc`7JWaORI0MAPZNG;aL?zz@ zV9J)tF*PUfs4X1)$W>%N&>V#kl)$M%NHbh~l4|nLlW3AiLS(8@xQM%t#(EajypM2G zvgvdJ@aUMbQSUaRG5&z*jX7LE`sErpN%^H(4ppz2-yM&Tl}3~+ZUP(0pt&!V0a?d* zc|o{Wz4!sD|&O%8^!*F=5?i5>7RNFh#= zW4$j+N@K+Xr!{+~@dyt2$|-MaK(joH3>K(`B(Z@6^-KvSQ`T4kmWa|&71Lw|#R2kR zT1~1GT2yc_wkU)pGq6w6Vm<)rI^``A?-4M=sni`x0kQ_=)FBTi!!dttfQW_EITw-^ zsw`%z^kc*bNDogkLz)8)`O2f8#!FtVkvQb*f#;B~4*5Dw z3~H?_Y$%TAJ1U|i&oT8bMb}Bw?`9}*y^Kl|JhNourC(wRTf`T$@GYEox@j4fb&E%$ z!_}0A8N)NHRqKI5bkagS>TnHuAN>AWW|6M72BX&6_TKL1Zf9e^gF?2w?*3-`+V=kT z#zwc*+3Bu#yBpi<>ziBt=FXblI#Fg>+oIa|3}tVyV+$Zl8s0SLm9ROCfeEz*Ii`BY z81hzbgUGquKZTR0ynO6kHLvx=BG)Tx$`K7s&MEOhL;lJ*68KFdU?}`ixS>Vr!Jshw zDCpGUm^$Puv|MD?WkXT}H!vzB>7Idxx&wAfH8cWDE(8HEV2ESLXsKjMu0#{Wh=^HP z8dH>iANBY$iyMWHSf&0Tp9F=E>iBxrlEQW157NHj4hG*6LjzWC26snvoDJgGn;^_J z#ktjpY=q7w_yKC5Lc?g2-`5cQDq0}t@DZZ{;MyqL^tNSfiE;U?#wL zoY~qMyqf_J8rY=@Yr^rj3ZnFY?Bun-LGvNRGLboHgGmU?w1Iz!8)VgP8e&MkEO>XT zzM@!j2@U~lXRFaa1@E!n0t2wVj6=SDUC8$@8K08C$>ig z{3+dP!iH}Igk2Wk9uCG9Zp!mXn-4{0p5WBaI(V@p_yav=H1rAn!vj&JsUEF!aUsbR8B}4(Pdha9hH~_-*GIV>DTdxPDlYRCTA#D{(8v-RuotjZp zKfH&3d@O(0kgIBpy0Bp52vF9-zm|^-hzP1KgQR}{0Wh+kBtcvz7Q_mS3wWaC<3Lhm zRuUc`i?3wA@+4KRdIuVIpkW6ZE`fK?qcnYG=UMi~eRaRJ*IM1%-)!v! z`v^yFuRrhq!?Qn|9_r`)e|-A<*|(&dpFjKI{9e|#eV*ii`oHi0L;oKQ*8VsBf671j zP5;*g2>xZci7#HrZwwj!&&LiKekmi(aUwfpc#2p?>Nt^w#?~Rj9WvZ;B0EmxMDVPT z3k7Ah<3vtqp?4>Oghp}>@{~~l6{Mj_i*PoTH#WD(?Zgv;YGwW~IXb!0qzsH^6rl#l zsFFj5J7l=6!*j@R10j*6t|z(D)o+m7zJNP*si-8H&4S zK`e}I4jJx{;Xqhk2bfR`h`6t0ng9SwVU|F1oV*qY(H6cVMPUvZelSgi=W^g*)f~8? zUf4zv^Nkm%Z#Yh5$BAqpsty?*vntvl!wHUl5m?_L!^LY;H^m{t0atgN$izvOxNimh zIFQ=)#TU2GOhWZz1+(0`Ag$*g| zaDa1k$Z*Gr47lJh{G2Y3lsX+|_-)R^BPgY!whv5t&@=};u$GmfIAnMO0BC_+mk`-A ztG9l?qHDKuwem<7F8hx8yzjEDmxzoFkH2skOQ7fY zqZ=O?SiIvzz6>FuLxvli_!~urzZ|IkEyQcR2wnL5D`ao8m(z5fj@JqWzfRHBo>z|7 ziq4?pwQ|`@l<5Om3~mj@K&f ztN-q@mlj54=?pF+*wlVQrnkr;nOsEFBhfUFoaE#k8C8#=`H*Vq>6=TM*#~n+90utb zNz`-hx)LCe93W*qH!I}U*U}+;TRCsa?Jf~ZYF1m-=h8k2-WcPtOQ=*M4ho?DDB@dr z-V124H$AYnE7yejKa{gk^%AIW?Dj%_8Mbj3sX^Wg-XLuA=yZXfpr z0sx+Mh+i3i2xRtz4pgFH9K%q)i%yd};6*@nAS##TjVT-cHj2#ocrf?AxB~vFaKKd1 zFCcf)$j}4(m_R6XB*=yu1#hPEUIS0)h4LqQFZ~ejSDu0+M10Y0Qns#dV_q4*N)Y0+ z>VW0ZLAZacbTdX!6>bx3I!kc7#6)pgIS5LTq z6fxBb2BXJ^Mu)MHea{FS;klfMW35mGJXWkmWNFC0KEDnnfL>{1ZDKt>qkdN zNX6hr)qLsIbSB(ie^A3o8)2|K?>1Ik_;V=2yl2nest#IJCxG!>G{>M5!sM zZ?Es~UL}|0^rcqlC-s|{(Sh9E^QsyReeK(5EcLZ-Q&@(teJ>bujbPXcg!(GM?LHo* z&=|XGxJ&)P2ib?*=sZtW2ltnJgdwJ-dzd+va)6TIT`7;HtOyz7R~^fY(!vZgMyL~y z#^@KkzzzMYO7$(sZG{o&$f1!{98d3Mvq@VHb&~BUg{lHQ$-t(T`VMvCP$y!jI6gv@ z$lw44zS*Hp;-GubRA<5Q5y}qgl(@ayVw}Gh_a+dfNa4PE%F<@!X}-#+lZ())>T2aX z!%3u`O3~?j9K#n9{67ls-~z%o%PB@#ig$_+S%)_m^}_aCL%irAU-8~clF3jCQi!F= zEopcns-oBjjGX=lP){;P;YC_YE}*_dB0QT{#AnbWKO-p}jC}0+{a^??3tpUVdL${E zeXl~xBPh;`l&yF%MSBppLqR9xV;Y^XT{8O%S3&xOcmh7pjJi^l0KK(Ok%5LtHC=CC zNL?)hpxCCiz_DoGwr0KWwHDqaj7#y|knUZ^b2T1be*#lH6_I7{%#>SeI+ z_#E_Gc4zwvib_ET2T$lq|AeTb1>2a#%sdQHS3 zv`G!Pj}0iMo4Zr$R+P}ID^o5hdG3@E?y|QyYRcsL$m;Tdht9kuUzqdAj5-|@#O$Y-TV38oXSWeqY{ttWdkV4w!xzZ$-Yi8-MQbdKvJfotq1$*3J- zY&sguG65{ZH>i(7Ok(!=8kdl}W!}|2bX^>LHUS%O(&)v0$AnS1>|;M`I0I(ouZf>S z7G1Vj;wbdsw{YO)jJ)|^^5*iXX%t=&`%U%!xQ1^B1589l z9>(eiO5LHi))<1Wqi&^A(Z9YZw)&~QgBfE(Ed0gv zoOoKv=k3&2s~hX^H<2Zq{*AX*>YkN@+Gh9Zk7*(n<`ki8*$-vBzz(GiHG{ExZF_Tf zeSd#*eYU0B8~fW^>)ZJqifu!yb}xF|%>L=v@=Xz|Y(BDm6{@QYKYdXc>7pRKNV;=N zx%n2d<-cq=WSVw)8ZhB_xgpnX&$ZIAMc#IMu-yLY*uj}O0s{S+VEr#7Iq*vg06v6? z1Jg9o1JL&f5#>&|ht9Y+LX?npN*s#apC6J&V_`I*f&xu01e*;_e+y<@H^iw*Es3cl z0k=<05!i$X4Kms$&oI$j%oX9QBbbNyf0#gzf?#Y7K|ba1Py(#5Xos*e**;hzK4R_0 z*hKs+4j`Erp7LR^)JL(*9TS;{vYT3kdEY@QypVOtUQ39GJ;bzx!a6Bpu@&W_=SqO`tEL*&? zxf$QE4z6mkK$N+9cbl<)=Ck0<)N85;lJ{|AgX{D?Se+QGI0QwlO*&PR;fRJ+X9c%Q zpeQkkPtgahAblJF_sYzQ^bTz^P8V9roTuK14hhF!QzAR->+oZOai&RudIxG#Adh2* zARPL=;NZYp6Q?L_uZ6?jOheg-hBl%$j(Ti%A$vt{=oruRTX;O4+?&AEc#N5+zqT;=Qkbh<`uBt?#2S|TRIX>C z&sIWr-lKS0)8fZ?U~r6P2J)z#mVJdxE5iUfhE6c*c9rcy425G{5HTY({rde}bnriL+pI6|&k zScf3=eddW?hbAzS2@2Gag>SaH@E%iax0$TZrOl3)Ksh@SXGh}fNSxm=iSzO3%V6LA ziz3zTH~W=-<=GFWX^52|S7LVyYSNmRs;c$HEg2`L9O?&4OB*%MyJb$XV@n3Kpk^-1 zK@=kW&6d z8$_ zj!w+E5xi^-X*_~67alFb?wtko=`Wb$6F*JpHfu{ue^r#Y$~{zVa1P3qc~kl=GaHkI zZ3~BthX}5dE|IhjTf@AljZFJ;&}KoM4}BREPGOrl85lVLN-&n72``=#Ho-VuKU+|% zcaUNkz>_m=wkC+CG#N>=k~gJ8V?m95mk}uIRU$57PW}b8=E&wS>5ykhsnwv%s3YJB z(o%JQ)s>gHpO^Tj=+&L%R!v51R z;?E&1+zA>u`Gc4sh#-dS)bu4*7|WBRw0{2>&wroIh|*hWFsxEsroPNp@fmE~@)c30 zq8V}u#CiuW#+PFB3fPAba&BA)d^-f3D~HD)+`ikmdF#Uuj*y+S8$OrqbTjxXG1eJg zw5aVctt}&s`nJ|7ze-p879UjqLWTlP{j$m6kMZg}S`u&EIf*)5WSqwx^)a{Kie@ zSU!m35hnPZ5w||v!Q*o1c+2^(m(j2_)3OTYRpWkj88`%4VMLgqb_LV?4zz(W<*LiT zf1~y!>Y2nnKDuYF+(m$Pm{bw@N1V2IRzDt%@7mAqnTvMqKniXwi)V;HsX+7z;9g(f z;G)H6_}4xjhvbVYDJONHyJI-Bsgz7WOc*lc=~;cFw!WgxN--sdxaDy?(T2+6$1uRU z;$nC``F#$8=dEC02fWN780SLHVr zwRQfhnXU7U=l;(*VBQ6$R2DzheS##p*#Ql=$08;P$d%x67w=RE~(&GiS z?m#ldO`I#O7_3%cFhp84MZw6@lEoXaeaM*dI+CDF#^`Rfpcel}Eo@)9O!#y`P5fcC zQqGYS!?p`*(|3#65eXMBs3Ct+%8M+h5&x3;7!w{}P!qN', 'code': \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}), ('tool_name', 'code_interpreter')]": { + "[[], {\"kwargs\": {\"code\": \"def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef get_nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(get_nth_prime(100))\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { "type": "value", "value": { - "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\"}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef get_nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(get_nth_prime(100))'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(nth_prime(100))'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\n# Load data\\ndf = pd.read_csv(\"\")\\n# Rows\\nprint(\"Number of rows and columns in the data:\", df.shape)\\n# Columns\\nprint(\"Columns of the data are:\", len(df.columns))\\n# Column names\\nprint(\"Columns of the data are:\", df.columns)\\n# Column dtypes\\nprint(\"Datatype of the columns are:\", df.dtypes)\\n# Sample of data\\nprint(\"Data sample from file:\")\\nprint(df.head())'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics about the dataframe\\nprint(df.describe())'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print information about the dataframe\\nprint(df.info())\\n\\n# Print summary statistics of the dataframe\\nprint(df.describe())'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\ndf = pd.read_csv(\"\")\\nprint(df.head())\\nprint(df.info())\\nprint(df.describe())'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Convert the \\'Year\\' column to datetime\\ndf[\\'Year\\'] = pd.to_datetime(df[\\'Year\\'], format=\\'%Y\\')\\n\\n# Group by \\'Year\\' and calculate the average inflation\\ndf_avg_inflation = df.groupby(\\'Year\\')[\\'Inflation\\'].mean().reset_index()\\n\\n# Plot the average inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation[\\'Year\\'], df_avg_inflation[\\'Inflation\\'], marker=\\'o\\')\\nplt.title(\\'Average Yearly Inflation\\')\\nplt.xlabel(\\'Year\\')\\nplt.ylabel(\\'Inflation\\')\\nplt.grid(True)\\nplt.show()'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'code': 'import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv(\"\")\\n\\n# Convert the \\'Year\\' column to datetime\\ndf[\\'Year\\'] = pd.to_datetime(df[\\'Year\\'], format=\\'%Y\\')\\n\\n# Group by \\'Year\\' and calculate the average inflation\\ndf_avg_inflation = df.groupby(\\'Year\\')[\\'Inflation\\'].mean().reset_index()\\n\\n# Plot the average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(df_avg_inflation[\\'Year\\'], df_avg_inflation[\\'Inflation\\'], marker=\\'o\\')\\nplt.title(\\'Average Yearly Inflation\\')\\nplt.xlabel(\\'Year\\')\\nplt.ylabel(\\'Inflation\\')\\nplt.grid(True)\\nplt.show()'}), ('tool_name', 'code_interpreter')]": { - "type": "value", - "value": { - "content": "error\n[stdout]\n[Errno 2] No such file or directory: 'bwrap'\n[/stdout]\n[stderr]\n[Errno 2] No such file or directory: 'bwrap'\n[/stderr]", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'query': 'How to use LoRA in Torchtune', 'vector_db_ids': ['vector_db_']}), ('tool_name', 'knowledge_search')]": { - "type": "value", - "value": { - "content": [ - { - "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:cc646\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\n See :ref:`below` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n # Print the first layer's self-attention in the usual Llama2 model\n >>> print(base_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (pos_embeddings): RotaryPositionalEmbeddings()\n )\n\n # Print the same for Llama2 with LoRA weights\n >>> print(lora_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): LoRALinear(\n (dropout): Dropout(p=0.0, inplace=False)\n \n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:cc646\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", - "type": "text" - }, - { - "text": "Result 4:\nDocument_id:cc646\nContent: from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n # Assuming that base_model already has the pretrained Llama2 weights,\n # this will directly load them into your LoRA model without any conversion necessary.\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n :func:`validate_missing_and_unexpected_for_lora() `.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n # Fetch all params from the model that are associated with LoRA.\n lora_params = get_adapter_params(lora_model)\n\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n set_trainable_params(lora_model, lora_params)\n\n # Print the total number of parameters\n total_params = sum([p.numel() for p in lora_model.parameters()])\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n print(\n f\"\"\"\n {total_params} total params,\n {trainable_params}\" trainable params,\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n \"\"\"\n )\n\n 6742609920 total params,\n 4194304 trainable params,\n 0.06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null } } }, - "()_[('kwargs', {'session_id': '', 'query': 'How to use LoRA', 'vector_db_ids': ['vector_db_']}), ('tool_name', 'knowledge_search')]": { + "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { "type": "value", "value": { - "content": [ - { - "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:cbc88\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW `), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:8892b\nContent: with training with LoRA quickly,\njust specify any config with ``_lora`` in its name, e.g:\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device\n\n\nThere are two sets of parameters to customize LoRA to suit your needs. Firstly, the parameters which control\nwhich linear layers LoRA should be applied to in the model:\n\n* ``lora_attn_modules: List[str]`` accepts a list of strings specifying which layers of the model to apply\n LoRA to:\n\n * ``q_proj`` applies LoRA to the query projection layer.\n * ``k_proj`` applies LoRA to the key projection layer.\n * ``v_proj`` applies LoRA to the value projection layer.\n * ``output_proj`` applies LoRA to the attention output projection layer.\n\n Whilst adding more layers to be fine-tuned may improve model accuracy,\n this will come at the cost of increased memory usage and reduced training speed.\n\n* ``apply_lora_to_mlp: Bool`` applies LoRA to the MLP in each transformer layer.\n* ``apply_lora_to_output: Bool`` applies LoRA to the model's final output projection.\n This is usually a projection to vocabulary space (e.g. in language models), but\n other modelling tasks may have different projections - classifier models will project\n to the number of classes, for example\n\n.. note::\n\n Models which use tied embeddings (such as Gemma and Qwen2 1.5B and 0.5B) for the\n final output projection do not support ``apply_lora_to_output``.\n\nThese are all specified under the ``model`` flag or config entry, i.e:\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.apply_lora_to_mlp=True \\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"output_proj\"]\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.llama3.lora_llama3_8b\n apply_lora_to_mlp: True\n model.lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\",\"output_proj\"]\n\nSecondly, parameters which control the scale of the impact of LoRA on the model:\n\n* ``lora_rank: int`` affects the scale of\n", - "type": "text" - }, - { - "text": "Result 4:\nDocument_id:cbc88\nContent: LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n # Build Llama2 without any LoRA layers\n base_model = llama2_7b()\n\n # The default settings for lora_llama2_7b will match those for llama2_7b\n # We just need to define which layers we want LoRA applied to.\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n # layers outside of the self-attention.\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n Calling :func:`lora_llama_2_7b ` alone will not handle the definition of which parameters are trainable.\n See :ref:`below` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n # Print the first layer's self-attention in the usual Llama2 model\n >>> print(base_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (pos_embeddings): RotaryPositionalEmbeddings()\n )\n\n # Print the same for Llama2 with LoRA weights\n >>> print(lora_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): LoRALinear(\n (dropout): Dropout(p=0.0, inplace=False)\n \n", - "type": "text" - }, - { - "text": "Result 5:\nDocument_id:9dcb7\nContent: ora_finetune_label>`.\nFor more on QLoRA in torchtune, see our :ref:`QLoRA Tutorial `.\n\nLet's take a look at how we can fine-tune Llama3-8B-Instruct with LoRA on a single device using torchtune. In this example, we will fine-tune\nfor one epoch on a common instruct dataset for illustrative purposes. The basic command for a single-device LoRA fine-tune is\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device\n\n.. note::\n To see a full list of recipes and their corresponding configs, simply run ``tune ls`` from the command line.\n\nWe can also add :ref:`command-line overrides ` as needed, e.g.\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n checkpointer.checkpoint_dir= \\\n tokenizer.path=/tokenizer.model \\\n checkpointer.output_dir=\n\nThis will load the Llama3-8B-Instruct checkpoint and tokenizer from ```` used in the :ref:`tune download ` command above,\nthen save a final checkpoint in the same directory following the original format. For more details on the\ncheckpoint formats supported in torchtune, see our :ref:`checkpointing deep-dive `.\n\n.. note::\n To see the full set of configurable parameters for this (and other) configs we can use :ref:`tune cp ` to copy (and modify)\n the default config. :ref:`tune cp ` can be used with recipe scripts too, in case you want to make more custom changes\n that cannot be achieved by directly modifying existing configurable parameters. For more on :ref:`tune cp ` see the section on\n :ref:`modifying configs ` in our \":ref:`finetune_llama_label`\" tutorial.\n\nOnce training is complete, the model checkpoints will be saved and their locations will be logged. For\nLoRA fine-tuning, the final checkpoint will contain the merged weights, and a copy of just the (much smaller) LoRA weights\nwill\n", - "type": "text" - }, - { - "text": "END of knowledge_search tool results.\n", - "type": "text" - } - ], - "error_code": null, - "error_message": null, - "metadata": { - "document_ids": [ - "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c", - "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c", - "8892b092-6394-471e-b143-a23c6cc374f8", - "cbc884b1-9d88-4d5c-aff4-7a4b3a56618c", - "9dcb747d-0627-40cc-a23c-0bee2b6b05af" - ] + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null } } }, - "()_[('kwargs', {'session_id': '', 'query': 'Llama3-8B attention type', 'vector_db_ids': ['test-vector-db-']}), ('tool_name', 'knowledge_search')]": { + "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport code_interpreter\\n\\n# Load the CSV file\\ndf = pd.read_csv(\\\"\")\\n\\n# Print the first few rows of the dataframe\\nprint(df.head())\\n\\n# Print the data types of each column\\nprint(df.dtypes)\\n\\n# Print the summary statistics of the dataframe\\nprint(df.describe())\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { "type": "value", "value": { - "content": [ - { - "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:num-1\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\nof models across a `range of different benchmarks `_.\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\nThere are a few main changes between Llama2-7B and Llama3-8B models:\n\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\n- Llama3-\n", - "type": "text" - }, - { - "text": "Result 2:\nDocument_id:num-1\nContent: instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\n\n|\n\nGetting access to Llama3-8B-Instruct\n------------------------------------\n\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\non the `official Meta page `_ to gain access to the model.\nNext, make sure you grab your Hugging Face token from `here `_.\n\n\n.. code-block:: bash\n\n tune download meta-llama/Meta-Llama-3\n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:num-0\nContent: :`download Llama3 Instruct weights `\n\n\nTemplate changes from Llama2 to Llama3\n--------------------------------------\n\nThe Llama2 chat model requires a specific template when prompting the pre-trained\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\ninference on the model, you'll need to use the same template for optimal performance\non chat data. Otherwise, the model will just perform standard text completion, which\nmay or may not align with your intended use case.\n\nFrom the `official Llama2 prompt\ntemplate guide `_\nfor the Llama2 chat model, we can see that special tags are added:\n\n.. code-block:: text\n\n [INST] <>\n You are a helpful, respectful, and honest assistant.\n <>\n\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \n\nLlama3 Instruct `overhauled `\n", - "type": "text" - }, - { - "text": "Result 4:\nDocument_id:num-0\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\n\nThe tags are entirely different, and they are actually encoded differently than in\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\nLlama3 template to understand how.\n\n.. note::\n The Llama3 Base model uses a `different prompt template\n `_ than Llama3 Instruct\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\n template for optimal performance. Generally, for instruct and chat data, we recommend using\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\n Llama3 Instruct.\n\n.. _prompt_template_vs_special_tokens:\n\nTokenizing prompt templates & special tokens\n--------------------------------------------\n\nLet's say I have a sample of a single user-assistant turn accompanied with a system\nprompt:\n\n.. code-block:: python\n\n sample = [\n {\n \"role\": \"system\",\n \"\n", - "type": "text" - }, - { - "text": "Result 5:\nDocument_id:num-3\nContent: LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n # Build Llama2 without any LoRA layers\n base_model = llama2_7b()\n\n # The default settings for lora_llama2_7b will match those for llama2_7b\n # We just need to define which layers we want LoRA applied to.\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n # layers outside of the self-attention.\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n Calling :func:`lora_llama_2\n", - "type": "text" - }, - { - "text": "END of knowledge_search tool results.\n", - "type": "text" - } - ], - "error_code": null, - "error_message": null, - "metadata": { - "document_ids": [ - "num-1", - "num-1", - "num-0", - "num-0", - "num-3" - ] + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null } } }, - "()_[('kwargs', {'session_id': '', 'query': 'NBA creation date', 'vector_db_ids': ['test-vector-db-']}), ('tool_name', 'knowledge_search')]": { + "[[], {\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv(\\\"inflation.csv\\\")\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot average yearly inflation as a time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}]": { "type": "value", "value": { - "content": [ - { - "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", - "type": "text" - }, - { - "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "END of knowledge_search tool results.\n", - "type": "text" - } - ], - "error_code": null, - "error_message": null, - "metadata": { - "document_ids": [ - "nba_wiki", - "perplexity_wiki", - "perplexity_wiki" - ] + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null } } }, - "()_[('kwargs', {'session_id': '', 'query': 'Perplexity company founding date', 'vector_db_ids': ['test-vector-db-']}), ('tool_name', 'knowledge_search')]": { + "[[], {\"kwargs\": {\"query\": \"How to use LoRA in Torchtune\", \"session_id\": \"\", \"vector_db_ids\": [\"vector_db_\"]}, \"tool_name\": \"knowledge_search\"}]": { "type": "value", "value": { - "content": [ - { - "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "Result 2:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", - "type": "text" - }, - { - "text": "END of knowledge_search tool results.\n", - "type": "text" + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:1b69d\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\n See :ref:`below` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n # Print the first layer's self-attention in the usual Llama2 model\n >>> print(base_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (pos_embeddings): RotaryPositionalEmbeddings()\n )\n\n # Print the same for Llama2 with LoRA weights\n >>> print(lora_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): LoRALinear(\n (dropout): Dropout(p=0.0, inplace=False)\n \n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:1b69d\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", + "type": "text" + }, + { + "text": "Result 4:\nDocument_id:1b69d\nContent: from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n # Assuming that base_model already has the pretrained Llama2 weights,\n # this will directly load them into your LoRA model without any conversion necessary.\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n :func:`validate_missing_and_unexpected_for_lora() `.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n # Fetch all params from the model that are associated with LoRA.\n lora_params = get_adapter_params(lora_model)\n\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n set_trainable_params(lora_model, lora_params)\n\n # Print the total number of parameters\n total_params = sum([p.numel() for p in lora_model.parameters()])\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n print(\n f\"\"\"\n {total_params} total params,\n {trainable_params}\" trainable params,\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n \"\"\"\n )\n\n 6742609920 total params,\n 4194304 trainable params,\n 0.06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe ', 'query': 'Perplexity the company founding date', 'vector_db_ids': ['test-vector-db-']}), ('tool_name', 'knowledge_search')]": { + "[[], {\"kwargs\": {\"query\": \"Llama3-8B attention type\", \"session_id\": \"\", \"vector_db_ids\": [\"test-vector-db-\"]}, \"tool_name\": \"knowledge_search\"}]": { "type": "value", "value": { - "content": [ - { - "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "Result 2:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", - "type": "text" - }, - { - "text": "END of knowledge_search tool results.\n", - "type": "text" + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:num-1\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\nof models across a `range of different benchmarks `_.\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\nThere are a few main changes between Llama2-7B and Llama3-8B models:\n\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\n- Llama3-\n", + "type": "text" + }, + { + "text": "Result 2:\nDocument_id:num-1\nContent: instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\n\n|\n\nGetting access to Llama3-8B-Instruct\n------------------------------------\n\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\non the `official Meta page `_ to gain access to the model.\nNext, make sure you grab your Hugging Face token from `here `_.\n\n\n.. code-block:: bash\n\n tune download meta-llama/Meta-Llama-3\n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:num-0\nContent: :`download Llama3 Instruct weights `\n\n\nTemplate changes from Llama2 to Llama3\n--------------------------------------\n\nThe Llama2 chat model requires a specific template when prompting the pre-trained\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\ninference on the model, you'll need to use the same template for optimal performance\non chat data. Otherwise, the model will just perform standard text completion, which\nmay or may not align with your intended use case.\n\nFrom the `official Llama2 prompt\ntemplate guide `_\nfor the Llama2 chat model, we can see that special tags are added:\n\n.. code-block:: text\n\n [INST] <>\n You are a helpful, respectful, and honest assistant.\n <>\n\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \n\nLlama3 Instruct `overhauled `\n", + "type": "text" + }, + { + "text": "Result 4:\nDocument_id:num-0\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\n\nThe tags are entirely different, and they are actually encoded differently than in\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\nLlama3 template to understand how.\n\n.. note::\n The Llama3 Base model uses a `different prompt template\n `_ than Llama3 Instruct\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\n template for optimal performance. Generally, for instruct and chat data, we recommend using\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\n Llama3 Instruct.\n\n.. _prompt_template_vs_special_tokens:\n\nTokenizing prompt templates & special tokens\n--------------------------------------------\n\nLet's say I have a sample of a single user-assistant turn accompanied with a system\nprompt:\n\n.. code-block:: python\n\n sample = [\n {\n \"role\": \"system\",\n \"\n", + "type": "text" + }, + { + "text": "Result 5:\nDocument_id:num-3\nContent: LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n # Build Llama2 without any LoRA layers\n base_model = llama2_7b()\n\n # The default settings for lora_llama2_7b will match those for llama2_7b\n # We just need to define which layers we want LoRA applied to.\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n # layers outside of the self-attention.\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n Calling :func:`lora_llama_2\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "num-1", + "num-1", + "num-0", + "num-0", + "num-3" + ] } - ], - "error_code": null, - "error_message": null, - "metadata": { - "document_ids": [ - "perplexity_wiki", - "perplexity_wiki", - "nba_wiki" - ] } } }, - "()_[('kwargs', {'session_id': '', 'query': 'Torchtune documentation', 'vector_db_ids': ['vector_db_']}), ('tool_name', 'knowledge_search')]": { + "[[], {\"kwargs\": {\"query\": \"Perplexity company founding date\", \"session_id\": \"\", \"vector_db_ids\": [\"test-vector-db-\"]}, \"tool_name\": \"knowledge_search\"}]": { "type": "value", "value": { - "content": [ - { - "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:ab1b9\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\nlook like so:\n\n.. code-block:: python\n\n from torchtune.datasets import chat_dataset\n from torchtune.models.llama3 import llama3_tokenizer\n\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\n ds = chat_dataset(\n tokenizer=tokenizer,\n source=\"json\",\n data_files=\"data/my_data.json\",\n split=\"train\",\n conversation_column=\"dialogue\",\n conversation_style=\"sharegpt\",\n )\n\n.. code-block:: yaml\n\n # In config\n tokenizer:\n _component_: torchtune.models.llama3.llama3_tokenizer\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\n\n dataset:\n _component_: torchtune.datasets.chat_dataset\n source: json\n data_files: data/my_data.json\n split: train\n conversation_column: dialogue\n conversation_style: sharegpt\n\n.. note::\n You can pass in any keyword argument for `load_dataset `_ into all our\n Dataset classes and they will honor them. This is useful for common parameters\n such as specifying the data split with :code:`split` or configuration with\n :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW `.\n.. .. _glossary_fsdp2:\n\n", - "type": "text" - }, - { - "text": "Result 4:\nDocument_id:cc646\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", - "type": "text" - }, - { - "text": "Result 5:\nDocument_id:8bcf6\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.use_dora=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.apply_lora_to_mlp=True \\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n model.lora_rank=16 \\\n model.lora_alpha=32 \\\n model.use_dora=True \\\n model.quantize_base=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n apply_lora_to_mlp: True\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n lora_rank: 16\n lora_alpha: 32\n use_dora: True\n quantize_base: True\n\n\n.. note::\n\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP `.\n.. .. _glossary_fsdp2:\n\n", - "type": "text" - }, - { - "text": "END of knowledge_search tool results.\n", - "type": "text" + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "Result 2:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "perplexity_wiki", + "perplexity_wiki", + "nba_wiki" + ] } - ], - "error_code": null, - "error_message": null, - "metadata": { - "document_ids": [ - "ab1b9c78-180f-48cb-bbef-c70a4a59e42d", - "cc6460bf-74ab-4d11-8d32-bc02144a4e79", - "8bcf61e4-98c4-41a7-87f9-833c1a4d2b28", - "cc6460bf-74ab-4d11-8d32-bc02144a4e79", - "8bcf61e4-98c4-41a7-87f9-833c1a4d2b28" - ] } } }, - "()_[('kwargs', {'session_id': '', 'query': 'current CEO of Meta'}), ('tool_name', 'web_search')]": { + "[[], {\"kwargs\": {\"query\": \"Torchtune documentation\", \"session_id\": \"\", \"vector_db_ids\": [\"vector_db_\"]}, \"tool_name\": \"knowledge_search\"}]": { "type": "value", "value": { - "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\u2018Boz\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\u2019s finance and facilities team to keep track of the company\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Meta to spend up to $65 billion this year to power AI goals, Zuckerberg ...\", \"url\": \"https://www.reuters.com/technology/meta-invest-up-65-bln-capital-expenditure-this-year-2025-01-24/\", \"content\": \"Meta Platforms plans to spend as much as $65 billion this year to expand its AI infrastructure, CEO Mark Zuckerberg said on Friday, aiming to bolster the company's position against rivals OpenAI\", \"score\": 0.73914057, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Forbes\", \"url\": \"https://www.forbes.com/profile/mark-zuckerberg/\", \"content\": \"Meta CEO Mark Zuckerberg \\u201cloved\\u201d an image on Facebook known as \\\"Challah Horse\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\", \"score\": 0.6410185, \"raw_content\": null}]}", - "error_code": null, - "error_message": null, - "metadata": null - } - }, - "()_[('kwargs', {'session_id': '', 'query': 'using LoRA in Torchtune', 'vector_db_ids': ['vector_db_']}), ('tool_name', 'knowledge_search')]": { - "type": "value", - "value": { - "content": [ - { - "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:c4fc3\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\n See :ref:`below` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n # Print the first layer's self-attention in the usual Llama2 model\n >>> print(base_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (pos_embeddings): RotaryPositionalEmbeddings()\n )\n\n # Print the same for Llama2 with LoRA weights\n >>> print(lora_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): LoRALinear(\n (dropout): Dropout(p=0.0, inplace=False)\n \n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:c4fc3\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", - "type": "text" - }, - { - "text": "Result 4:\nDocument_id:c4fc3\nContent: from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n # Assuming that base_model already has the pretrained Llama2 weights,\n # this will directly load them into your LoRA model without any conversion necessary.\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n :func:`validate_missing_and_unexpected_for_lora() `.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n # Fetch all params from the model that are associated with LoRA.\n lora_params = get_adapter_params(lora_model)\n\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n set_trainable_params(lora_model, lora_params)\n\n # Print the total number of parameters\n total_params = sum([p.numel() for p in lora_model.parameters()])\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n print(\n f\"\"\"\n {total_params} total params,\n {trainable_params}\" trainable params,\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n \"\"\"\n )\n\n 6742609920 total params,\n 4194304 trainable params,\n 0.06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_ into all our\n Dataset classes and they will honor them. This is useful for common parameters\n such as specifying the data split with :code:`split` or configuration with\n :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW `.\n.. .. _glossary_fsdp2:\n\n", + "type": "text" + }, + { + "text": "Result 4:\nDocument_id:1b69d\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", + "type": "text" + }, + { + "text": "Result 5:\nDocument_id:deca9\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.use_dora=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.apply_lora_to_mlp=True \\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n model.lora_rank=16 \\\n model.lora_alpha=32 \\\n model.use_dora=True \\\n model.quantize_base=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n apply_lora_to_mlp: True\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n lora_rank: 16\n lora_alpha: 32\n use_dora: True\n quantize_base: True\n\n\n.. note::\n\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP `.\n.. .. _glossary_fsdp2:\n\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "b222e2e6-0584-429c-bf93-db53059f56fd", + "1b69d5af-63c0-439b-af6b-db5ec865ec3e", + "deca9bab-a475-4955-8dd9-7235ebd0f2a6", + "1b69d5af-63c0-439b-af6b-db5ec865ec3e", + "deca9bab-a475-4955-8dd9-7235ebd0f2a6" + ] } - ], - "error_code": null, - "error_message": null, - "metadata": { - "document_ids": [ - "c4fc3cb6-6172-489e-90a7-b39d343e14c0", - "c4fc3cb6-6172-489e-90a7-b39d343e14c0", - "c4fc3cb6-6172-489e-90a7-b39d343e14c0", - "c4fc3cb6-6172-489e-90a7-b39d343e14c0", - "c4fc3cb6-6172-489e-90a7-b39d343e14c0" - ] } } }, - "()_[('kwargs', {'session_id': '', 'query': 'when was the nba created', 'vector_db_ids': ['test-vector-db-']}), ('tool_name', 'knowledge_search')]": { + "[[], {\"kwargs\": {\"query\": \"current CEO of Meta\", \"session_id\": \"\"}, \"tool_name\": \"web_search\"}]": { "type": "value", "value": { - "content": [ - { - "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", - "type": "text" - }, - { - "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", - "type": "text" - }, - { - "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "Result 3:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", - "type": "text" - }, - { - "text": "END of knowledge_search tool results.\n", - "type": "text" + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\u2018Boz\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\u2019s finance and facilities team to keep track of the company\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) - Investopedia\", \"url\": \"https://www.investopedia.com/terms/m/mark-zuckerberg.asp\", \"content\": \"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg is a self-taught computer programmer and co-founder, chair, and chief executive officer of Meta (META), formerly known as Facebook. Mark Zuckerberg is a self-taught computer programmer and the co-founder, chair, and CEO of Meta (formerly Facebook). In April 2018, Zuckerberg testified on Capitol Hill about Facebook's use of users' information, including the sharing of 87 million users' information to Cambridge Analytica. Technically, Mark Zuckerberg makes a salary of $1 a year at Facebook. Booker Join With Facebook Founder and CEO Mark Zuckerberg to Advance a National Model for Improving Public Schools.\\\"\", \"score\": 0.74697095, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Forbes\", \"url\": \"https://www.forbes.com/profile/mark-zuckerberg/\", \"content\": \"Meta CEO Mark Zuckerberg \\u201cloved\\u201d an image on Facebook known as \\\"Challah Horse\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\", \"score\": 0.6410185, \"raw_content\": null}]}", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, + "[[], {\"kwargs\": {\"query\": \"when was the nba created\", \"session_id\": \"\", \"vector_db_ids\": [\"test-vector-db-\"]}, \"tool_name\": \"knowledge_search\"}]": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", + "type": "text" + }, + { + "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "nba_wiki", + "perplexity_wiki", + "perplexity_wiki" + ] + } + } + } + }, + "[]_{\"kwargs\": {\"code\": \"def is_prime(n):\\n if n <= 1:\\n return False\\n if n <= 3:\\n return True\\n if n % 2 == 0 or n % 3 == 0:\\n return False\\n i = 5\\n while i * i <= n:\\n if n % i == 0 or n % (i + 2) == 0:\\n return False\\n i += 6\\n return True\\n\\ndef get_nth_prime(n):\\n count = 0\\n num = 2\\n while True:\\n if is_prime(num):\\n count += 1\\n if count == n:\\n return num\\n num += 1\\n\\nprint(get_nth_prime(100))\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, + "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\n# Load data\\ndf = pd.read_csv(\\\"\")\\n# Rows\\nprint(\\\"Number of rows and columns in the data:\\\", df.shape)\\n# Columns\\nprint(\\\"Columns of the data are:\\\", len(df.columns))\\n# Column names\\nprint(\\\"Columns of the data are:\\\", df.columns)\\n# Column dtypes\\nprint(\\\"Datatype of the columns are:\\\", df.dtypes)\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, + "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\ndf = pd.read_csv(\\\"\")\\nprint(df.head())\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, + "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert 'date' column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, + "[]_{\"kwargs\": {\"code\": \"import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load data\\ndf = pd.read_csv('inflation.csv')\\n\\n# Convert date column to datetime\\ndf['date'] = pd.to_datetime(df['date'])\\n\\n# Group by year and calculate average inflation\\naverage_inflation = df.groupby(df['date'].dt.year)['inflation'].mean()\\n\\n# Plot the time series\\nplt.figure(figsize=(10,6))\\nplt.plot(average_inflation.index, average_inflation.values, marker='o')\\nplt.title('Average Yearly Inflation')\\nplt.xlabel('Year')\\nplt.ylabel('Average Inflation')\\nplt.grid(True)\\nplt.show()\", \"session_id\": \"\"}, \"tool_name\": \"code_interpreter\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "completed\n[stderr]\nTraceback (most recent call last):\n line 5, in \n from bwrap.core import main\nModuleNotFoundError: No module named 'bwrap.core'\n[/stderr]", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, + "[]_{\"kwargs\": {\"query\": \"How to use LoRA in Torchtune\", \"session_id\": \"\", \"vector_db_ids\": [\"vector_db_\"]}, \"tool_name\": \"knowledge_search\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:af027\nContent: .. _lora_finetune_label:\n\n============================\nFine-Tuning Llama2 with LoRA\n============================\n\nThis guide will teach you about `LoRA `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW ` alone will not handle the definition of which parameters are trainable.\n See :ref:`below` for how to do this.\n\nLet's inspect each of these models a bit more closely.\n\n.. code-block:: bash\n\n # Print the first layer's self-attention in the usual Llama2 model\n >>> print(base_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (output_proj): Linear(in_features=4096, out_features=4096, bias=False)\n (pos_embeddings): RotaryPositionalEmbeddings()\n )\n\n # Print the same for Llama2 with LoRA weights\n >>> print(lora_model.layers[0].attn)\n MultiHeadAttention(\n (q_proj): LoRALinear(\n (dropout): Dropout(p=0.0, inplace=False)\n \n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:af027\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", + "type": "text" + }, + { + "text": "Result 4:\nDocument_id:af027\nContent: from our Llama2\nmodel without any wrappers or custom checkpoint conversion logic.\n\n.. code-block:: python\n\n # Assuming that base_model already has the pretrained Llama2 weights,\n # this will directly load them into your LoRA model without any conversion necessary.\n lora_model.load_state_dict(base_model.state_dict(), strict=False)\n\n.. note::\n Whenever loading weights with :code:`strict=False`, you should verify that any missing or extra keys in\n the loaded :code:`state_dict` are as expected. torchtune's LoRA recipes do this by default via\n :func:`validate_missing_and_unexpected_for_lora() `.\n\nOnce we've loaded the base model weights, we also want to set only LoRA parameters to trainable.\n\n.. _setting_trainable_params:\n\n.. code-block:: python\n\n from torchtune.modules.peft.peft_utils import get_adapter_params, set_trainable_params\n\n # Fetch all params from the model that are associated with LoRA.\n lora_params = get_adapter_params(lora_model)\n\n # Set requires_grad=True on lora_params, and requires_grad=False on all others.\n set_trainable_params(lora_model, lora_params)\n\n # Print the total number of parameters\n total_params = sum([p.numel() for p in lora_model.parameters()])\n trainable_params = sum([p.numel() for p in lora_model.parameters() if p.requires_grad])\n print(\n f\"\"\"\n {total_params} total params,\n {trainable_params}\" trainable params,\n {(100.0 * trainable_params / total_params):.2f}% of all params are trainable.\n \"\"\"\n )\n\n 6742609920 total params,\n 4194304 trainable params,\n 0.06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe \", \"vector_db_ids\": [\"test-vector-db-\"]}, \"tool_name\": \"knowledge_search\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:num-1\nContent: 3 `_ is a new family of models released by Meta AI that improves upon the performance of the Llama2 family\nof models across a `range of different benchmarks `_.\nCurrently there are two different sizes of Meta Llama 3: 8B and 70B. In this tutorial we will focus on the 8B size model.\nThere are a few main changes between Llama2-7B and Llama3-8B models:\n\n- Llama3-8B uses `grouped-query attention `_ instead of the standard multi-head attention from Llama2-7B\n- Llama3-8B has a larger vocab size (128,256 instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\n- Llama3-\n", + "type": "text" + }, + { + "text": "Result 2:\nDocument_id:num-1\nContent: instead of 32,000 from Llama2 models)\n- Llama3-8B uses a different tokenizer than Llama2 models (`tiktoken `_ instead of `sentencepiece `_)\n- Llama3-8B uses a larger intermediate dimension in its MLP layers than Llama2-7B\n- Llama3-8B uses a higher base value to calculate theta in its `rotary positional embeddings `_\n\n|\n\nGetting access to Llama3-8B-Instruct\n------------------------------------\n\nFor this tutorial, we will be using the instruction-tuned version of Llama3-8B. First, let's download the model from Hugging Face. You will need to follow the instructions\non the `official Meta page `_ to gain access to the model.\nNext, make sure you grab your Hugging Face token from `here `_.\n\n\n.. code-block:: bash\n\n tune download meta-llama/Meta-Llama-3\n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:num-0\nContent: :`download Llama3 Instruct weights `\n\n\nTemplate changes from Llama2 to Llama3\n--------------------------------------\n\nThe Llama2 chat model requires a specific template when prompting the pre-trained\nmodel. Since the chat model was pretrained with this prompt template, if you want to run\ninference on the model, you'll need to use the same template for optimal performance\non chat data. Otherwise, the model will just perform standard text completion, which\nmay or may not align with your intended use case.\n\nFrom the `official Llama2 prompt\ntemplate guide `_\nfor the Llama2 chat model, we can see that special tags are added:\n\n.. code-block:: text\n\n [INST] <>\n You are a helpful, respectful, and honest assistant.\n <>\n\n Hi! I am a human. [/INST] Hello there! Nice to meet you! I'm Meta AI, your friendly AI assistant \n\nLlama3 Instruct `overhauled `\n", + "type": "text" + }, + { + "text": "Result 4:\nDocument_id:num-0\nContent: 'm Meta AI, your friendly AI assistant<|eot_id|>\n\nThe tags are entirely different, and they are actually encoded differently than in\nLlama2. Let's walk through tokenizing an example with the Llama2 template and the\nLlama3 template to understand how.\n\n.. note::\n The Llama3 Base model uses a `different prompt template\n `_ than Llama3 Instruct\n because it has not yet been instruct tuned and the extra special tokens are untrained. If you\n are running inference on the Llama3 Base model without fine-tuning we recommend the base\n template for optimal performance. Generally, for instruct and chat data, we recommend using\n Llama3 Instruct with its prompt template. The rest of this tutorial assumes you are using\n Llama3 Instruct.\n\n.. _prompt_template_vs_special_tokens:\n\nTokenizing prompt templates & special tokens\n--------------------------------------------\n\nLet's say I have a sample of a single user-assistant turn accompanied with a system\nprompt:\n\n.. code-block:: python\n\n sample = [\n {\n \"role\": \"system\",\n \"\n", + "type": "text" + }, + { + "text": "Result 5:\nDocument_id:num-3\nContent: LoRA to Llama2 models\n------------------------------\n\nWith torchtune, we can easily apply LoRA to Llama2 with a variety of different configurations.\nLet's take a look at how to construct Llama2 models in torchtune with and without LoRA.\n\n.. code-block:: python\n\n from torchtune.models.llama2 import llama2_7b, lora_llama2_7b\n\n # Build Llama2 without any LoRA layers\n base_model = llama2_7b()\n\n # The default settings for lora_llama2_7b will match those for llama2_7b\n # We just need to define which layers we want LoRA applied to.\n # Within each self-attention, we can choose from [\"q_proj\", \"k_proj\", \"v_proj\", and \"output_proj\"].\n # We can also set apply_lora_to_mlp=True or apply_lora_to_output=True to apply LoRA to other linear\n # layers outside of the self-attention.\n lora_model = lora_llama2_7b(lora_attn_modules=[\"q_proj\", \"v_proj\"])\n\n.. note::\n\n Calling :func:`lora_llama_2\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "num-1", + "num-1", + "num-0", + "num-0", + "num-3" + ] + } + } + } + }, + "[]_{\"kwargs\": {\"query\": \"Perplexity company founding date\", \"session_id\": \"\", \"vector_db_ids\": [\"test-vector-db-\"]}, \"tool_name\": \"knowledge_search\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "Result 2:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "perplexity_wiki", + "perplexity_wiki", + "nba_wiki" + ] + } + } + } + }, + "[]_{\"kwargs\": {\"query\": \"Torchtune documentation\", \"session_id\": \"\", \"vector_db_ids\": [\"vector_db_\"]}, \"tool_name\": \"knowledge_search\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:61fc5\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. For any\ncustom local dataset we always need to specify ``source``, ``data_files``, and ``split`` for any dataset\nbuilder in torchtune. For :func:`~torchtune.datasets.chat_dataset`, we additionally need to specify\n``conversation_column`` and ``conversation_style``. Our data follows the ``\"sharegpt\"`` format, so\nwe can specify that here. Altogether, our :func:`~torchtune.datasets.chat_dataset` call should\nlook like so:\n\n.. code-block:: python\n\n from torchtune.datasets import chat_dataset\n from torchtune.models.llama3 import llama3_tokenizer\n\n tokenizer = llama3_tokenizer(\"/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\")\n ds = chat_dataset(\n tokenizer=tokenizer,\n source=\"json\",\n data_files=\"data/my_data.json\",\n split=\"train\",\n conversation_column=\"dialogue\",\n conversation_style=\"sharegpt\",\n )\n\n.. code-block:: yaml\n\n # In config\n tokenizer:\n _component_: torchtune.models.llama3.llama3_tokenizer\n path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model\n\n dataset:\n _component_: torchtune.datasets.chat_dataset\n source: json\n data_files: data/my_data.json\n split: train\n conversation_column: dialogue\n conversation_style: sharegpt\n\n.. note::\n You can pass in any keyword argument for `load_dataset `_ into all our\n Dataset classes and they will honor them. This is useful for common parameters\n such as specifying the data split with :code:`split` or configuration with\n :code:`name`\n\nIf you needed to add a prompt template, you would simply pass it into the tokenizer.\nSince we're fine-tuning Llama3, the tokenizer will handle all formatting for\nus and prompt templates are optional. Other models such as Mistral's :class:`~torchtune.models.mistral._tokenizer.MistralTokenizer`,\nuse a chat template by default (:class:`~torchtune.models.mistral.MistralChatTemplate`) to format\nall messages according to their `recommendations `_, a parameter-efficient finetuning technique,\nand show you how you can use torchtune to finetune a Llama2 model with LoRA.\nIf you already know what LoRA is and want to get straight to running\nyour own LoRA finetune in torchtune, you can jump to :ref:`LoRA finetuning recipe in torchtune`.\n\n.. grid:: 2\n\n .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn\n\n * What LoRA is and how it saves memory during finetuning\n * An overview of LoRA components in torchtune\n * How to run a LoRA finetune using torchtune\n * How to experiment with different LoRA configurations\n\n .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites\n\n * Be familiar with :ref:`torchtune`\n * Make sure to :ref:`install torchtune`\n * Make sure you have downloaded the :ref:`Llama2-7B model weights`\n\nWhat is LoRA?\n-------------\n\n`LoRA `_ is an adapter-based method for\nparameter-efficient finetuning that adds trainable low-rank decomposition matrices to different layers of a neural network,\nthen freezes the network's remaining parameters. LoRA is most commonly applied to\ntransformer models, in which case it is common to add the low-rank matrices\nto some of the linear projections in each transformer layer's self-attention.\n\n.. note::\n\n If you're unfamiliar, check out these references for the `definition of rank `_\n and discussion of `low-rank approximations `_.\n\nBy finetuning with LoRA (as opposed to finetuning all model parameters),\nyou can expect to see memory savings due to a substantial reduction in the\nnumber of parameters with gradients. When using an optimizer with momentum,\nlike `AdamW `.\n.. .. _glossary_fsdp2:\n\n", + "type": "text" + }, + { + "text": "Result 4:\nDocument_id:af027\nContent: 06% of all params are trainable.\n\n.. note::\n If you are directly using the LoRA recipe (as detailed :ref:`here`), you need only pass the\n relevant checkpoint path. Loading model weights and setting trainable parameters will be taken care\n of in the recipe.\n\n\n.. _lora_recipe_label:\n\nLoRA finetuning recipe in torchtune\n-----------------------------------\n\nFinally, we can put it all together and finetune a model using torchtune's `LoRA recipe `_.\nMake sure that you have first downloaded the Llama2 weights and tokenizer by following :ref:`these instructions`.\nYou can then run the following command to perform a LoRA finetune of Llama2-7B with two GPUs (each having VRAM of at least 16GB):\n\n.. code-block:: bash\n\n tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora\n\n.. note::\n Make sure to point to the location of your Llama2 weights and tokenizer. This can be done\n either by adding :code:`checkpointer.checkpoint_files=[my_model_checkpoint_path] tokenizer_checkpoint=my_tokenizer_checkpoint_path`\n or by directly modifying the :code:`7B_lora.yaml` file. See our \"\":ref:`config_tutorial_label`\" recipe\n for more details on how you can easily clone and modify torchtune configs.\n\n.. note::\n You can modify the value of :code:`nproc_per_node` depending on (a) the number of GPUs you have available,\n and (b) the memory constraints of your hardware.\n\nThe preceding command will run a LoRA finetune with torchtune's factory settings, but we may want to experiment a bit.\nLet's take a closer look at some of the :code:`lora_finetune_distributed` config.\n\n.. code-block:: yaml\n\n # Model Arguments\n model:\n _component_: lora_llama2_7b\n lora_attn_modules: ['q_proj', 'v_proj']\n lora_rank: 8\n lora_alpha: 16\n ...\n\nWe see that the\n", + "type": "text" + }, + { + "text": "Result 5:\nDocument_id:d5787\nContent: etune\n:func:`torchtune.models.llama3.llama3_8b` with DoRA, you would use :func:`torchtune.models.llama3.lora_llama3_8b` with ``use_dora=True``:\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.use_dora=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n use_dora: True\n\nSince DoRA extends LoRA, the parameters for :ref:`customizing LoRA ` are identical. You can also quantize the base model weights like in :ref:`glossary_qlora` by using ``quantize=True`` to reap\neven more memory savings!\n\n.. code-block:: bash\n\n tune run lora_finetune_single_device --config llama3/8B_lora_single_device \\\n model.apply_lora_to_mlp=True \\\n model.lora_attn_modules=[\"q_proj\",\"k_proj\",\"v_proj\"] \\\n model.lora_rank=16 \\\n model.lora_alpha=32 \\\n model.use_dora=True \\\n model.quantize_base=True\n\n.. code-block:: yaml\n\n model:\n _component_: torchtune.models.lora_llama3_8b\n apply_lora_to_mlp: True\n lora_attn_modules: [\"q_proj\", \"k_proj\", \"v_proj\"]\n lora_rank: 16\n lora_alpha: 32\n use_dora: True\n quantize_base: True\n\n\n.. note::\n\n Under the hood, we've enabled DoRA by adding the :class:`~torchtune.modules.peft.DoRALinear` module, which we swap\n out for :class:`~torchtune.modules.peft.LoRALinear` when ``use_dora=True``.\n\n.. _glossary_distrib:\n\n\n.. TODO\n\n.. Distributed\n.. -----------\n\n.. .. _glossary_fsdp:\n\n.. Fully Sharded Data Parallel (FSDP)\n.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n.. All our ``_distributed`` recipes use `FSDP `.\n.. .. _glossary_fsdp2:\n\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "61fc5307-4b19-4b23-ab6b-4abbd9614d2c", + "af027703-518d-44e3-b7ab-ff5feb73b769", + "d57876d1-5073-4954-b100-b192d52d04fe", + "af027703-518d-44e3-b7ab-ff5feb73b769", + "d57876d1-5073-4954-b100-b192d52d04fe" + ] + } + } + } + }, + "[]_{\"kwargs\": {\"query\": \"current CEO of Meta\", \"session_id\": \"\"}, \"tool_name\": \"web_search\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": "{\"query\": \"current CEO of Meta\", \"top_k\": [{\"title\": \"Executives - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer Joel Kaplan, Chief Global Affairs Officer Susan Li, Chief Financial Officer Javier Olivan, Chief Operating Officer Chris Cox, Chief Product Officer Andrew \\u2018Boz\\u2019 Bosworth, Chief Technology Officer Jennifer Newstead, Chief Legal Officer Dave Wehner, Chief Strategy Officer Will Cathcart, Head of WhatsApp Naomi Gleit, Head of Product John Hegeman, Chief Revenue Officer Adam Mosseri, Head of Instagram Erin Egan, Chief Privacy Officer, Policy Michel Protti, Chief Privacy Officer, Product Alex Schultz, Chief Marketing Officer and VP of Analytics Tom Alison, Head of Facebook Nicola Mendelsohn, Head of Global Business Group Ahmad Al-Dahle, VP and Head of GenAI at Meta Joelle Pineau, Vice President of AI Research and Head of FAIR at Meta\", \"score\": 0.8190992, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer - Meta\", \"url\": \"https://about.meta.com/media-gallery/executives/mark-zuckerberg/\", \"content\": \"Mark Zuckerberg, Founder, Chairman and Chief Executive Officer | Meta Meta Quest Ray-Ban Meta Meta Horizon Meta AI Meta Verified Meta Pay Meta Horizon Workrooms Meta and you Learn about our community Shop Meta Meta Quest Meta Portal Meta Horizon Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004. In October 2021, Facebook rebranded to Meta to reflect all of its products and services across its family of apps and a focus on developing social experiences for the metaverse \\u2014 moving beyond 2D screens toward immersive experiences like augmented and virtual reality to help build the next evolution in social technology. Shop Ray-Ban Meta glassesRay-Ban StoriesPrivacy informationSupported countries \\u00a9 2025 Meta\", \"score\": 0.79099923, \"raw_content\": null}, {\"title\": \"Meet the Executive CSuite Team of Meta (Facebook) [2025]\", \"url\": \"https://digitaldefynd.com/IQ/meet-the-executive-csuite-team-of-meta-facebook/\", \"content\": \"Harvard University Executive Programs Free Harvard University Courses As a chief financial officer of Meta, Susan Li oversees the firm\\u2019s finance and facilities team to keep track of the company\\u2019s overall financial health. The chief operating officer of Meta, Javier Olivan, oversees the firm\\u2019s business team, infrastructure, and other products. Andrew Bosworth, called Boz, serves as chief technology officer at Meta and is responsible for leading the firm\\u2019s AR/VR organization, Reality Labs. Andrew has also served as engineering director to oversee events, mobile monetization, and feed ads and as VP of ads and business platforms to lead engineering, design, analytics, and product teams. Meta\\u2019s c-suite team comprises experienced and diverse executives, having extensive experience in technology, finance, legal, and all major industries.\", \"score\": 0.7602419, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) - Investopedia\", \"url\": \"https://www.investopedia.com/terms/m/mark-zuckerberg.asp\", \"content\": \"Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg: Founder and CEO of Meta (formerly Facebook) Mark Zuckerberg is a self-taught computer programmer and co-founder, chair, and chief executive officer of Meta (META), formerly known as Facebook. Mark Zuckerberg is a self-taught computer programmer and the co-founder, chair, and CEO of Meta (formerly Facebook). In April 2018, Zuckerberg testified on Capitol Hill about Facebook's use of users' information, including the sharing of 87 million users' information to Cambridge Analytica. Technically, Mark Zuckerberg makes a salary of $1 a year at Facebook. Booker Join With Facebook Founder and CEO Mark Zuckerberg to Advance a National Model for Improving Public Schools.\\\"\", \"score\": 0.74697095, \"raw_content\": null}, {\"title\": \"Mark Zuckerberg - Forbes\", \"url\": \"https://www.forbes.com/profile/mark-zuckerberg/\", \"content\": \"Meta CEO Mark Zuckerberg \\u201cloved\\u201d an image on Facebook known as \\\"Challah Horse\\\" that happens to be AI-generated, highlighting the amount of AI spam on the platform. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President Elect Meta has donated $1 million to President-elect Donald Trump\\u2019s inaugural fund, the company confirmed to various news outlets on Wednesday, a move that comes just weeks after its CEO Mark Zuckerberg met with Trump at his Mar-a-Lago residence in an apparent bid to mend years of strained ties. ### Meta Donates $1 Million To Trump\\u2019s Inaugural Fund Weeks After Mark Zuckerberg Met President-Elect Read the full profile on Forbes: https://www.forbes.com/sites/kerryadolan/2023/09/26/mark-gets-meta-zuckerberg-talks-ai-and-that-musk-mma-fight-thats-never-going-to-happen/?sh=671046e73037\", \"score\": 0.6410185, \"raw_content\": null}]}", + "error_code": null, + "error_message": null, + "metadata": null + } + } + }, + "[]_{\"kwargs\": {\"query\": \"when was the nba created\", \"session_id\": \"\", \"vector_db_ids\": [\"test-vector-db-\"]}, \"tool_name\": \"knowledge_search\"}": { + "type": "value", + "value": { + "__module__": "llama_stack.apis.tools.tools", + "__pydantic__": "ToolInvocationResult", + "data": { + "content": [ + { + "text": "knowledge_search tool found 3 chunks:\nBEGIN of knowledge_search tool results.\n", + "type": "text" + }, + { + "text": "Result 1:\nDocument_id:nba_w\nContent: The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).\n", + "type": "text" + }, + { + "text": "Result 2:\nDocument_id:perpl\nContent: Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:\n\n Srinivas, the CEO, worked at OpenAI as an AI researcher.\n Konwinski was among the founding team at Databricks.\n Yarats, the CTO, was an AI research scientist at Meta.\n Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "Result 3:\nDocument_id:perpl\nContent: Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]\n", + "type": "text" + }, + { + "text": "END of knowledge_search tool results.\n", + "type": "text" + } + ], + "error_code": null, + "error_message": null, + "metadata": { + "document_ids": [ + "nba_wiki", + "perplexity_wiki", + "perplexity_wiki" + ] } - ], - "error_code": null, - "error_message": null, - "metadata": { - "document_ids": [ - "nba_wiki", - "perplexity_wiki", - "perplexity_wiki" - ] } } } diff --git a/tests/integration/fixtures/recorded_responses/invoke_tool.pickle b/tests/integration/fixtures/recorded_responses/invoke_tool.pickle deleted file mode 100644 index a03204511a05a9542d235aab184538a73de63b56..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 67524 zcmeHwOLHSxc3#WVk|q#i%}61K?C>K}tmz@xVuB>dVv!VCGV9e{qbhby7kjG5RV5$+ zB!DaenV8H3+3cCNr3iUtMvNj%*^801amWh)0I$8X7Y^_I2OJJBy^t5uLK}bIIrruR z09mZ+?&sFT)m?8m3P)E>vNAEwUjyYD$s*Lmot-r^@q%T9GM zjiNy#^oBm~Kk-|Q#P{NMe{t#gIp6hWdOGsY&wlyX8;t$){+nn2WH9iCUL#4p_EF6n z1xbw~n7_}@e)ACD?uW-w+e-n_`+hPWr04(keCdy5xUV-FouHjI8uIN|+fkSTtn&~5 z+poXz#*Y>R+=U%yVFtJh%g#a?jT%Sz{ppVu(jXo9y!Gx!etVn-$A03tGB4gZjtBg$ zpQfW^XJy4}MdP$K#H4HOXt*-;JAvo+yukpgv*Kq1tng6-5+8r)#YfJ+7`Ko7xaG&a zW#?Wr4m*Cl?A-2qK|J(A#|u0768K#wgTr~R+YQ=&?0h%!2hQ(%qk$Ll3EuS1AZmF7 zXTRIU^GSB+<8gwq9t8HbdqL=h?ErUXV|m8`hJ0@j9Ot9GH}Yewa@fmmyWNk2#JL@P zWJh`wN1bsy&2HEaJF$P_JR8?n*S2m&AIpbr=T?-QL~+`;j~)8$ei#j+-f8yacl|I7 zx`5%pKS@&G>)3lA_*lv8wmaUj?>zDQp)M0A{Wt}beqpF5!C>Ir_R@aai_>N2T?|H~ zeA4&QWPdbr4!md>V6FV1@T;B4ccXrYM|wV%--7VIf9!{2KLcXF;|-mMQIhyE*3^!F zKLpbDV*GSB4npT{FCXAhjCE~ilU;TmMS}p}9|r9{HWBYm(`V1U#38@5M?{(LKjMV&Yx}JzI&uzzb~NyS+rYg+ zg6Ze?7<{@lPJ$35z$nzsCyo3- zh2z2C6OgO?@P5h@_Nfp25WC}koSpE`m;n4(SD3`s`QF%1Qs;f|)V+nHE|=u5ccVD? zI5JnT)N=9rz}s%%cl6sM4~WPy?7dH-_$ZE|VWOA0x2MtAd4N3vMIxpeSkIceW8r zI5m%T94~P)jK=A!udZ&?ocp2kUOSCIU2&nl1`2DDKK5HNCWbXgBLxrt#(sC;gPLJr zVrGyg&PdN*BB+V;ehmD@mv)TPFE@0(VE{02yEhu?Z61JXj}r&W-T~zpL?hDBBod12 ze*~%;_#ubsMzKtNh}A##V?NUIPa`aT{f?8gW8V+4P!X7N#|ef*+zzJX z6o(rGN512YdqbQoOpN<>9K`7u8xZ^6fIEdrg5Zpt);Jho>R8Iq|0s3*<7faT2Y|73 z2GBI4(KX%R5_0th9>`x}uRTt&C4OQ_R}f-qL*dYm$DN|%%=rA@#Ft2L2eaU&smp-4t!#ltzplZkO zo`xM^{qnxZ6%Rz6%aDbfd1zt#i8ut0vF)yIy?E?fM|ZR$;%e0Xd)` z`eYVBTWqZ}(EayUzW+XG5g2{&u^{_0SeYKQ z2VN`50PExB9$2fwBDBg6LBM=a0*u=UVo)8@g2?V3P9zR-vJ6Vl3IN713c=;=OJoOq z9`TN$IEi8T_CmIZkPcIxMUoDgh*Nm;GU!th^uX{vD=O#$8>%O`fobp%SuAL~asU)< zh#`;&vAj7QQo_>_j3<4v;`6f82L@oeAUGgpW#UD;lW)wj#noki4CzmSBJK^n7g(nt z?2LgDz~0&f>DXMYZ>()!iE@D9VM&q^#2yDi4L*N+^SXo89*_>EeIUXqI39l(MJK>F zaL-`tvFAWSF(CnqPh*dM1j^g>GVsez<`U<4shGIhrzck{a28iZ~enhvNB z=nUZri0Ys_LE;hwmo&>=U32RjlN3hC+oNocsSC(X>$HG&!!bA}{=IN+c%z^_cpWA< zdD^JYP4er+3vdKO(5;~3onmi-p`4D$X+crLoQuR`5iB$U<>Q)qJuiTa>`)jUKs&)y z_wP?^;QIF3#_IJOSJ}a`_5|(hUIaZVnL*pR9XJH|QFa5^-Nm6=d6{&Eof3t1aslKh zvU|8qZ3RT>}~=29MsxAC9K4Mwtxi!$>~F2P@WD(kHvR; zKC~LBGEjZ`L9ai+|3$JV-SLLxo}|p!rDR0rMkMted%$bXwQJXu-Q9^ojDkI1gMy*x zd>A>0@pw40r}%ydIbjTP?%adQ35Ci(!v1%;d6TmRiWLO*yCNV7+L5Ytp}}6ZzPHfJ__RR zIM!G(aTstm{bU6ui1^g&Ko1O8KyB7nR<~E`n|eZeSa6kda!tksu{ugzFK|ITK&iac z9gdSDcQ}MJM7))2i5r5C#cnSGfpF8vRg_=(y`;ald1GyLW7EH}zPf&+OanGI)?hri zKAQwQ|K$Aa?*rHwBPF0bv5?ZTufVg@xw?(6?~CIC?d3e#faVO}=CHBzP6~ z?>U?LGBZt`z%%G!T?c5^2-E)LB8k=jC|C8ta6H79dJ$v}QU*3(>haJnHX-029S@MK z=@@!dHp9HCJa_xMff(=4pCd3j(z6hI8>ZFrB-U0}mzHMPL5UX)-0#ODjO)k78ruI_ z|4sb&t7^>bS70FRf8*@0C}n}_`JKwsB!zu2eqK3*obI z+i?y+=H$9VLvF`e%pYE?JY6wBo%et3p#RN-{xGc&9j9=p+q$WzB;HMmS2L!3pttB8=zKn5O@5zQS-#Lzb#sOF0 za|1uD;#o85QsHq~yd0EE#wa|`8R(r(ho;6yO*F0jYWuDJfs@ zYYMhOTz|UAZ;Q_r=hLWRe+0(a-^^%gscoIA4nqkaw4pu5YavTgk%zFFf$=Ofmq3#Y zWOxNIs7HHm>$EUDkg7&$XX$A^i^b>geRyHjy!sK>f*@1fg&2fWGe~QY3ZT?f@lO(b z?C(|AR+l$L%|1KzP!H-(mAuBhE9plk)un|QUakwM`=qbyJ0(JYmN~1w zIt2e!B(aPoy!|K263+ho%KRX6Gd`eiaNXUy1woyTJ<-%b1L<2l78^>V(}I^Wfzb7I z5rS>%>Sedna*f;a`HUT-ss@8m&o6r?XY%9c@LIk#<)j3UhK&YfUmT7NA=T$+-#moO zd0VgDPyJ!$s8rtmHJ}7;IPdSC9ZZ6Nvme3MMx(fM9j2>sc$Dl^Zry$7{sAfe?AxKN zz&VuED(5*1-qrQ)ZxyAAul0BPyC43Wuf6dGd@pzeGHB%vjMg+AWBqm@*|=*J1Nn{v zQF7Pn4>I-K*(qPwZ#EmWoO#sJU|h5&aYzktDM31c2`z!(0D2^<R?7NB)0nyEJD>Kif1TIIHSxnOmi(DMja@Q0h?{{1|Wrc&4t(zBr z6dc!J=Y*p>S*fqDUazgK*Eg=qro*6P{Hp{R9#F_?FnJGwRDlZvF^nVGQ5C^6gRlgO zm`EYI#yyB<^zOHG?$x#W)^h#&CYV+BqV@Xn>guYBu{xSTiKWtzGCQxZF==$z5zepZ_Jp2{F5DFh_JMiNAoH(}4^ELo+A~4hjuG4<9@Nik-qL zOY&ne0EcB3-p~XGR3RyV4GPgIl*V!)3fKFQh_ZLa|B7YBnIA7 zLt|iUq>DxU8N?9|i&b*dF)de+on*~GPP4byoO_5TL9_(aP*`+3Fl-K}gz?A=Wzoa< zt`b-=f!3TKz-KH(4So(Wm35=RAUctU3a?EnhV(bZ9Ybm;gmXj@HKRd=qc|k79`Nw4 z$lvQLErh+mRuzOR@88|O^YCtM*x>=ie$nDwSk4UITIB$Yml7mgrU?X7@o>W**W!oR zDb57rfNtmvn?i##s3tfHY)IU{WJR7x07I=(snlu^=Q}>^{89U8XNMTouZSYf1E`|f%$Rl+!Fqj@FCqPX2;TWzkiV^T%v=O4vhEvGA_@s|LfV40i zWj1jzXxDJ)ii`k9bwI&^+gup;gqDMhnX9>1&V~+`y#tn#uHeB5?1h|FJPs?+h$vA| z98m*-ylI*DErMZN+y)tv*CsHKWCM!#Fap3sAT>OrROF=LwtbKXjs5hx`_A)H$i7f~4^jsvHj|Af>=;Xep6S0&mnaBW%9nw396goKl+XI`w8Ef<=!Ss8Oj?tZy1TogqsX^+gWq-#;WPxI z_Q#M_YtGXZec8J{NF1jX`@hY=DolVb0yr742Y6@^abx5g`}YNdVo(*tf^;BFLL|=K z>Fll~H!Brcnf3g5Ht93(d$8kmKC7L8;C@B=^3$BYY+OoTt~SNpANvtCjUQv4hd{t= zYpK;iUpUR>Hue5uO;2_25D4Hg@&|_!WCNB!D|w4d4!nUGQ<18Mm?iS76F4idDKKXA z`by1%kA=^Ma7KGPN0umLWGO>@;soApHzT`_eU)HMk$8bMCpb<(N1Qx=hsIY&q9V)| z-l7np=i10C%?xTo)H0%>_;!J|^@;~g^s-#k$#F?u((>CLi9tYLBphrY8mGY27WD|r zpdmj{C@`x)-+gSTGtVO#qn=2H_JG&=9VjhsxB;!vW zKXDV9m@sgmY{P1@)1V{bo-GMoJkrpq5lLVO&xNBa_Q3f;2G_7tYaGKdMONqy8oJmZ zdWWjUaB(IWSmJy~IcK&vSLDblQi9^haY?{I?lW2ggw_E=A# z7{ai?HW?BEVSp^MP`LXP8r!gqf_j5sN~wbWzTskOV8u;qwEvEEyR(=W9z63 z%jI9MNW}i**D@jL`ZN(+&xIuT7~bC}?N&l!l*E^;W~K6ko@A)>?LLo9#>im&V2mJK z$M}0@NM)6se(^;0}@G%OC^kbAZgw z0Q>n?o-_iZs8Kx&b__n+>%&Bvyk@1 zh%3{TP<;$HQqgKCjofpxA*xF{2zdm&gkXLt1ZcL*yZ%UNA!tHy-?K zG=b-dui*h!bi=T8lxc9OQ4gsrQ3#aH;=U2D(}=>rHHW*5W%WJ0B=HAbH?weLJKpX` z@^Z>oPZxg3bNzx|?xXC}vHe71w1B-BjZ?k!+`NTS8V;7mjX@$I_B3R=u>21Pqdgj< zDe;wm(V_H5;6)kfG;)rPK`)lMnEB!%hB*-ZNMp;hB0TcmYy{ygtD_0#Hk-_uX)=dhM$mh5jKRKVD>|NX1mvJWkpfCi}Vmt#KLl${&5 zKRYY-42$p=V}|~-uV++w!VK-T)>_*Isth7W^MouEcpTNIWs5%lGN)3!4kCeQFDRFE zlJQ%U`h>>jX!7~(|RuHqD;=DplH)G=|${*g~#5-jGlF$I7o1Qw& zW&%?|+iy0P@tJRFFyQOHWaCF$t^vw$KB%Licuz_irRN(R-ixd2Ca?Dt0Lg3blx z6=Ahe1xq7pn&|4Zhm#vY^FiF}AdL_TfiKIFDsa_l(Y`it;8`0FIu&7X64RbUFA*Om zxRhCE`1Cchp@;@Tc_!{JaiK$yTFz7MIC z!TOE=L0f@wN9I8_5~eooH!dY4Vo-3gqT*u(p|#`*6A-CZPpz7}O~E%y*UG!`5F432M% zP!-G>wjzY(%R_bpN`jc4MMC)k0rPx#2ZU=!*T_6U8UcI>5Pesa;dVuZ(I2eyM73)M z55e_QSEd3C-~|_<*F!Ow1U`hYoHGlZKO-WsrMU?GBFhy+a zCJw@Nnb%xe1gzG_FOf+)O^Ac|r^`QS8mlpytIC1gyp5DyxO8W+c*p>?1JNx9jyN z#uqq~B)DBi;Y!^CxnyigfV3XLEZx~`e!DcVb0O^JWzav&{Dc`k(3(i1l0R*(T=FoF zoY>W@3ywSiwB2A>7Q11PU)^!L_GUG#WW!fvrYe=DtmjqEv;^*8BRk#* zJkg~c2JsuhnMph=8N0mTyc7Mv)u1gQ!FHP%F;qLtqp%bQwo zDgZJ}$O55%DT|T-X|?{)EykmR0gS8wNXfDJ>TFVW8WsGQMDX_+tBIOb$Vp$oqePP( zse{nk;#+0Dn82iwWzsyN1dlv^99pzp29NZap$Xd^U;?z6P?e_>H^ri*IadHpHX=g= zKdhYuN1$8~93`fQ-&WqoW;LohfrdBeF*|yRUOLK&B&?kvfqHBM10WEZ4TgCsjy{5e zQ|J4V*E}$fHA;`+J)n}ekjt%HfYdc3{7x0Fi3mBw#7Zo9;T~b>;yKPSU}>2&Uc_3G z8YFXbli)}+bPz-l!y|-2<2fMVja%a9W847gfJ1;QE)ceRn+BNQzwMQ)y=m^UWn>~f-0A1 zlvX#Q;B>T75~zlz6=Nw4O9lxO>SzQPo~sUg(J)zc3BEV@LbLKEqfO(d5p;jX0(A2vEVI$#@s@+$Aa zGEmsUG;{`-J9j~4>7vkuU66?uNQIemQCKT8%mQu}J|aQ_91ZB08TMhNCxhUASQD4f zNAoUfG0MCrM2> zW8;CiW10F%y-e+cq|-)%5>{mWtK@ta2raeB4-9q2YeI_U^1ggLvR!?h4VamdFo6dO zAER7_$TI%+okt%ePF0w#$us}{`}+^&?4(S5$0XM_-?@dvC?ZYC7&Q@)MW9?a48d6v zXN@1h;@S;_;WT&+Np__LF47g;tX2Wza$31@OVCr^SvaMI&0uypPYl=;SDwaavQh$$ zJ&G^g3S;D;u7cx&TwFaUsB46i%zdJTn46@gi~M}T@|b^7tM%Sf*aro58-<^Ej-Tgy zU-)Scw`Vv}xZdC+O+ByzLq>)$c+kzvD+VUBKs6c!VJHVNLJ$|&HK)F?pxC8L+OUy~ zDgZYZ3>lDDk*z_`p^uOhO%jBQ3a9bFB550*KH`@GgPED5W~@1IvJyQCTV?UpK7c(s zmS|9HkTvy`@S_PBB#9pjkA19qutMOWT$B(@&CZ{940y?2)K*H^sx5uQh%A^s%`89? zi7FSYAag_|7{+iNVsJ#awIR^qjrRNq4V|T*>4CA+23bNlJXYZpLXvt_mr)!>+#U=h zv-)&FZSzng0Z~yUUo_SgF#$kcbx9Y6VzWjU9+{?Dd0qI@xwJ-Nq_vBbw{L8cG}~c2 z{DE3!O9ot!PK2jpb!S zWPaTBMmElruy+Z?r1eUOczJs@PNae2M~r|g)5P_aEv1Q#@?Focwz8?UlB_vRtu0nu zgaOm*Rhp&js(r-{)BgR6F{p9{wK@ruSxOLUNiB%}B0Y;WNPUGGMkd0>ZYIfcEpEcm z25KZl&H^~KYMHTJ{P{l|LtcWCAe4a$w@y71kwjO3u%h%Y8~KMEx=F&W(nYgr2QqRw zxd_Z-qYBH%U`&8e)_Zo%esz|C0+MaDMXY5je(*O-4x){y9Y6yw(2>{wvjU zcpGb1j=XuxW`X|na}2-v>8vc!xB5Tr|Fr11nh3vXx7u4$;&2VD9OViXC%L2O>7aqBHk&%sm|v3NKc}FVfR-3Dd9Sv9OcTDqGw2 zR&G)-{#$yt=)Dq0hUilW-fJJh?_QA-2qrWep1V`yF49{O)gdPFCL;rwn#-u`Wyi1e zekM)}7RF%AgxLD&gl5tasdm9-0E@i!6`1%71|*?}fVy`XQ-K&FDX+pX1B$_gr%hNI zDu^NfwPI2fB{attvXRx&j2SwCX=m0*HybQr{3MmP5d$;B()<81Iw&g|KWAduTX z102AvY9Ql{-N?h<|qakB9h0-w59_yC{Q z!c%$JoKB5dhkR61CNFpt=?aB-yYhqTG`QeF_2FLhPDD!Cbj%B$hd&W=rq_OUDZw((tyLa%g5bu^X@_v~dQI0K^Dx@#(_Bi(MAC zBaVXZ5{jO{>!V?t677qAPbi*4U$TU>F{;BJG3+Fg^eci6Gn^8)P<8HtDYJSKj@j&Z z#@dVlbrpBehT+071##$vW|$2F)EHlr?+S4s&*RXt50X4#0xXF^QY;LGE$PBWriqUr zkXo-sNKwqXH|9dfTM|Su!LyAKH3?QOBH|I*9$tbnaohln&9(I;3V^Wrm!vqbZ;lNd z)eSMwa+4IDb5FlXQqk%IrqYC1{Y`uUo_R=v3AR>Z^=OGk;qXqecvRIUTfE~VGDi0F zdnbNach+jxwaD8lJ}gOK9}W|SykIR6A0j0l5^czWlU3e+U3`h{JCtx9Ixs!BC-m$S zEGc*)N4D9NzS$G8*m4H;W zX?OG#D=SxCFveNHXhJSu>NMzYiV3~^zjgl2gUJ}kXug#!t=3HO| zrI^=dxYW`q77NX%7%y47(X!(b%jPDB@QJ#bjbLnM7D zXDKKLEQr2jKUsJ%m)()TKn;mO`R3?k*MzJ!vfB`R$d+1oE!AWO0D>oj?@@e$%%d@D z2ve2B01;$VfVBZQ6nzoh27v)Dm!P)6>5RZM8!~eZolYcn6SL7c+S$1sRums6?8T-g z9TiG2t(!M*LeRi4RfeEw2xd*C#HXvzYb5&24}Zvls%){iZzWGn53j0dy|m+Kd_^?~ z8(rUH2ccwd19^MQz)vSHwE{2MlRglddtOy3`_qo1BKM~qMI{l@7(VS#NE>P7l{hil z`%UEUno@e4yjJaSYdw@AyrleH5p+x%H3A;5K@DT1$HDu z=r-b{M&p?2N>J+|NVBdH>R8&iT%wYN&o%JL8fLGCV{}^Bv=j$Q#%C6CS=2;GT@N); z3e`W@LC9JFOj3a;9-!LDlt$__izY&x)PQ4VY@!&y$y;a4akD8<4p0h3%chdt+ZBMy z#jxcf3uT6#4fU4rf0s8&S}M|H5rx0o9s~_0XYwnVidJJ`rz|RaKb!RuRf7wKCNf#3 zOF5DrvF>}j`Hx5qh;J4i%i`s043eL^bFwZ-;mLeVywK3cSudkO%32CoOF;y2AzkJ3 zl8>hwYg6$}mD}dzB(RZCPSG7vbpYKg*<_nJ0+E2J?DnfA7umL>Z|pH*+<^%w?5U;Y zfbw961yIcBLUT23fPv|GatP7L7E$4`L)M{|%@J}`2>r*6mBP4sE38;qFVf2>R{kLI zyMwlvF1r~zyLr9GLcW6^GaIZBKS}&BM~U#tVPYX6NE%s?2yDUDYx7qx|QC9>=c`>0_1p4iV|B3oF*4bM<=!Sioj4f2YCh@4@y>i6QtK> z2jzF598Zr$H}4@)yTnJZUOM{JDKloo5Q#EIqet_3yv7JLs`a242hj*fD+YLCPgQzJ zCa7hMzV=J1g>*BZmhO3BJOclQ6aV7+H9>}_qwHmaJ7Ec+~hB=9FEGA7xsuVwW`X^ zu3p;TN;LS3#e>b%>SbQ?SSuV2vW+Xi9S50tx&upfmh5M)3!TmF2POwq)@yz(w( z4S*h@W8MJ#0F_c9PvWeY2JphE*&FL;+X$!}$SqBEE|1|}2bupoI{ADZ-OWsSfQcAx zq^*tyxk^`9K+<%(h%AvWy@piALkrM4nel$iatR&hG3*oQf(a>qKSb@F-;cr*s6j`; zvU3L=RTAe1FuTxih+DrK^+W8+yATU(r)hF4B&e}nA$!e)$gTqC;a8VMowcGb#`fZr zHUC+O0p<`?QV-A(mkyo%`%5zRkb(jf11W%mEf@EqQ-F?BLYG%4bf_Le&Y7An!_*j% zC5s~KP+d^i05V>zHl=JXkX&#N>Vug^-vpFsCH>Jc5DO8sP`=~{mL$qBI%nmvw-5Qw zsdvBx&KeQ)rD6aUe%D92I0EY90;mF@T{7PPy)pD}xe+QYel?jp==w)#6v`%ZbRh6N z0Yi2krznz=)}CH}USz`;_kKG2V}mXeL3tDCBK!D7gZiHhs1umGFzhvCJlMD31N74tmvTQzA|Rp7!rsS? zNa1s;xAylH4f*te{0>YlJov!(P#e*y9^87cbj222S4p(_rtFCjy#|Z5ocI6w?CUal zez_nW=HIUW+bvE`P}{EzPF`h_9~QXEoGquM*eDE5bZRZ`yDZ;&^Bl%ap8+kr~SM5YT z!J}VXBgPf;SaUXg_OD@f-a2rKU@*HdzB`m7Kp1=kP!pIKQM>IiJg2 zkLpoor%&lsZUj}9CGN6*alx`+Y(LsZS_h!{f%b9AaxlFks*bSCdN?yweIv4G_kyqZQX*_JO0UcFcXpa(DBmcM_8e`mSzCG`29uS14s z8vV>g>>e9qU)Es!HSp`Uv`|oVcqSdNpDn@+Ftgpb{e5=B@$6TLZv?b^&lcbM!86;> zJnKMi?-Las@nxn9Ibnx+d$JRGr`zz3dlxk#w_`0meYSYtw)K9NH6X81yGL6wHV!Yh zBQLy+?aSd`0NxMG`Ji(B>J7^$Aq7ZmC=O_r(L237iwSVE*5%I@BU!M#dHJ&oyO(D- zn)c=Pb*3M=X<%Mj7xUBX*5!*c8ex;HVfj!8mJJhqbVq-F{t1dbyvc^v=V!kxVbkX? zZr~XFJhFq|#k?##_}}>a*|39a@Rh(7OXB-1e5pnM8e&vV5=-3|4zd!Y!Ubqz)6=S@ znGI3jL31G(ol)t3#QyZxoZE7u^I^<)YwjQ{%v#`c13#?dSu-k4*7k8pr?}D>h35;8 zcccw^HkCVgDX&-{>^uw{7(*Wr2p(sB$8_D)(XqfWY)>-j;z8EBvp__b7%B|%8lI59 z>HjVME6O+hzx&egpWhg;V6g&!-sbdK>pgAm z5%o&``eKbeMLWoZy=Ow7o!dd?c93enXea!Vnb`{#i?Z*k2(z_qPS|dKu56nx{sAay zF38iOtc>4(F-dPfIe+nw@iXX?7-ICVe~K#?n`HjSTo?Fi=mK*k;PsV&0ggaPX8Jj? z35_yZFg|;YzK$AgY~!L2vr4-&BNIMNU&ohPP$nfRdBFP6{uO3S68D&k1OnKw;>X(r5D0nM8x z&6_68nyy%JJ+(w`!jm)M&MUFcSBF{X%yszhHG8kZeUf#1KJ8Gyu<7Mj<+_FHuM^hf39dV@{yMMzIVS&d%$A%ng}bCGgx~d00LwWJJb8uD8xf8eAUq6P zr|>io9@jt_5;JxKID zQUQdmlVyg5+8;m=K`+*{P?T+4RMu=pMa>+er>!;l9NE>oF;3YR)x;L}5GCt%yb(e? zP0cjcZAzyqsi_)z&reYViMf$lUo9`{qZqAdA^WPSlh}E|D^4~;XY=fyGyU>vj_;-d z*JD&niG5UoMnrFe&DOPXAEMdiRvk-ARg`a&qGnRC3E)a870K6~HMzoDjrmH4ohkyB zqG;Hl(7{!16WckEQGPUwi1>a`mFm+`4Mkx60nm;NdxV2gJkj~%)g{^$@io}xID(W<6 zzuPISlon=br&jNN@)jdM3ur`AkUglW(F&yOz~+sO`sV8Pc71gU#tr21uCH$74;P>U zJoBFESXtR?thXuhqC^!GhK%7Cu4j57DgLUG%dh2<%S4sD3Dd>AaWgQgBxl)%sbBiW z%~?e($Z%l9QinP7|9nrB(C?S3fD2di9F{yLE!(P99(nprR^B}8fk$s3&6f+Wk&~+i-Yu$O}eR$dye;2V!0K-M}#0BG7 zuh)=+mx9PrZWBor6J>K@2qMKtDRvLNk4&@RdL>f;G+)G;!w+H%pw!z{?_#s)6(8l@0-%f_VVWAi&}Mc zad}Y-r{Y7wIKBA1I0R?Jmc}|BoBZx=ZkUSstTVV*TalmgHEpa_DyZndGC6UOKyoq} zgwj4X@#ZBR!KrH(z zmbVPhWT3cY(ocZu9l*e$Wh)OoRZw^E9bgFsf#^9N;S<|ypr{Ovh))R+1#V6YmFx2A zh4N~RBIFR{-*!HEan%K_JqPuo)3Osl(YcJvXH4Ffp=)R(a7y?cKk9b1P90Q=LtGG5 zuG_WE!g+#Jvd00CJW`h7$t3!hlv;OZ{Hav#6SG8{HvM~w#aDscN_K&z?4zQEQBgqk z76?%fG(>-ff(YcYPR!(PEzF=PBmnI)=55OF}xG8`C9{f z|CN$Ezj5|gl#2$i;dG>M1uIC96SjSpr9-JxG6*nJ$l8!@D3N;!im-zpwGtvs>497lYvOl(6S&z zAsv{{DXP^!94#!97NQN=J^L-&HRJQq>D>(4$9Jn(ovY3ihOk5 z(yk`*L{gK+3iF%;_~0jf0H!%eyC0$9kh>%^9+oa znoeTkh)5Sw}3eVLRBR^pY6y%_Dd1vmR9EZ1e3wqUm3v_2UznI=lKNle1dsC!91T}o=-qi z2qkHiIp+BUGxG^9v2t9YLu~&i_P;_%fB#!qoal#tgedJRhl*Yr%x|Jvc~7xTkVYYY z!BPH=97nCMgU zmFeAa2_-DH;h8Am3brzDa(`dC#Lt`DS3qA)PJ!*dGf{hR0$sigP42J4=Ko*+uls*J Jo5hc7{~s?~1&IIv