From d0854a48b2979cfa6b151dd18c7c3f5c3b795826 Mon Sep 17 00:00:00 2001 From: Aidan Do Date: Sun, 15 Dec 2024 17:53:22 +1100 Subject: [PATCH] . --- COMMANDS.md | 14 + build.yaml | 23 + hello.ipynb | 1528 +++++++++++++++++++++++++++++++++++++++++++++++++++ run.yaml | 80 +++ 4 files changed, 1645 insertions(+) create mode 100644 COMMANDS.md create mode 100644 build.yaml create mode 100644 hello.ipynb create mode 100644 run.yaml diff --git a/COMMANDS.md b/COMMANDS.md new file mode 100644 index 000000000..60d9fcb7f --- /dev/null +++ b/COMMANDS.md @@ -0,0 +1,14 @@ +```bash +source ~/miniconda3/bin/activate +conda create --prefix ./envs python=3.10 + +source ~/miniconda3/bin/activate +conda activate ./envs + +pip install -e . \ +&& llama stack build --config ./build.yaml --image-type conda \ +&& llama stack run ./run.yaml \ + --port 5001 + +pytest llama_stack/providers/tests/inference/test_text_inference.py -v -k groq --lf -s +``` \ No newline at end of file diff --git a/build.yaml b/build.yaml new file mode 100644 index 000000000..aa7833e4c --- /dev/null +++ b/build.yaml @@ -0,0 +1,23 @@ +version: '2' +name: groq +distribution_spec: + description: Use (an external) Groq server for running LLM inference + docker_image: null + providers: + inference: + - remote::groq + memory: + - inline::faiss + safety: + - inline::llama-guard + agents: + - inline::meta-reference + telemetry: + - inline::meta-reference + eval: + - inline::meta-reference + datasetio: + - inline::localfs + scoring: + - inline::basic +image_type: conda diff --git a/hello.ipynb b/hello.ipynb new file mode 100644 index 000000000..ab13adc9d --- /dev/null +++ b/hello.ipynb @@ -0,0 +1,1528 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ChatCompletionResponse(completion_message=CompletionMessage(content=\"Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?\", role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from llama_stack_client import LlamaStackClient\n", + "from IPython.display import display\n", + "\n", + "client = LlamaStackClient(\n", + " base_url=\"http://localhost:5001\",\n", + ")\n", + "\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.2-3B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Hello, world client!\"},\n", + " ],\n", + " # stream=True,\n", + ")\n", + "\n", + "display(response)\n", + "\n", + "# for chunk in response:\n", + " # print(chunk)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'inference': [ProviderInfo(provider_id='groq', provider_type='remote::groq')],\n", + " 'memory': [ProviderInfo(provider_id='faiss', provider_type='inline::faiss')],\n", + " 'safety': [ProviderInfo(provider_id='llama-guard', provider_type='inline::llama-guard')],\n", + " 'agents': [ProviderInfo(provider_id='meta-reference', provider_type='inline::meta-reference')],\n", + " 'telemetry': [ProviderInfo(provider_id='meta-reference', provider_type='inline::meta-reference')],\n", + " 'eval': [ProviderInfo(provider_id='meta-reference', provider_type='inline::meta-reference')],\n", + " 'datasetio': [ProviderInfo(provider_id='localfs', provider_type='inline::localfs')],\n", + " 'scoring': [ProviderInfo(provider_id='basic', provider_type='inline::basic')]}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.providers.list()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "**What is ASGI?**\n", + "\n", + "ASGI (Asynchronous Server Gateway Interface) is a standard for building asynchronous web servers and frameworks in Python. It was designed to replace the traditional WSGI (Web Server Gateway Interface) standard, which is synchronous and not suitable for modern asynchronous web development.\n", + "\n", + "**Key Features of ASGI**\n", + "\n", + "1. **Asynchronous**: ASGI allows for asynchronous execution of web requests, which means that the server can handle multiple requests concurrently without blocking.\n", + "2. **Event-driven**: ASGI uses an event-driven approach, where the server waits for events (e.g., incoming requests) and then responds to them.\n", + "3. **Non-blocking**: ASGI allows the server to continue executing other tasks while waiting for an event, making it more efficient and scalable.\n", + "\n", + "**How ASGI Works in Python**\n", + "\n", + "Here's a high-level overview of how ASGI works in Python:\n", + "\n", + "1. **ASGI Application**: An ASGI application is a Python function that takes an `app` object as an argument. The `app` object represents the ASGI server and provides methods for handling requests and responses.\n", + "2. **Request Handling**: When a request is received, the ASGI server creates an `ASGIRequest` object, which contains information about the request, such as the URL, headers, and body.\n", + "3. **Middleware**: ASGI applications can use middleware functions to modify or extend the request and response objects. Middleware functions are executed before and after the ASGI application handles the request.\n", + "4. **ASGI Application Execution**: The ASGI application is executed, which may involve calling other functions or methods to handle the request.\n", + "5. **Response Generation**: The ASGI application generates a response, which is an `ASGIResponse` object that contains information about the response, such as the status code, headers, and body.\n", + "6. **Response Sending**: The ASGI server sends the response back to the client.\n", + "\n", + "**ASGI Frameworks in Python**\n", + "\n", + "Some popular ASGI frameworks in Python include:\n", + "\n", + "1. **Sanic**: A modern, asynchronous web framework that provides a simple and efficient way to build web applications.\n", + "2. **Aiohttp**: A popular asynchronous HTTP client and server library that provides a simple and efficient way to build web applications.\n", + "3. **FastAPI**: A modern, asynchronous web framework that provides a simple and efficient way to build web applications with high performance and scalability.\n", + "\n", + "**Example Code**\n", + "\n", + "Here's an example of a simple ASGI application using the Sanic framework:\n", + "```python\n", + "from sanic import Sanic\n", + "\n", + "app = Sanic()\n", + "\n", + "@app.route('/')\n", + "async def index(request):\n", + " return 'Hello, World!'\n", + "\n", + "if __name__ == '__main__':\n", + " app.run()\n", + "```\n", + "This code defines a simple ASGI application that responds to GET requests to the root URL ('/') with the string 'Hello, World!'.\n", + "\n", + "I hope this helps you understand how ASGI works in Python! Let me know if you have any questions or need further clarification." + ] + } + ], + "source": [ + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.2-3B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Explain to me how ASGI in python works\"},\n", + " ],\n", + " stream=True,\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk.event.delta, end='')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "**What is ASGI?**\n", + "\n", + "ASGI (Asynchronous Server Gateway Interface) is a standard for building asynchronous web servers and frameworks in Python. It was designed to replace the traditional WSGI (Web Server Gateway Interface) standard, which is synchronous and not suitable for modern asynchronous web development.\n", + "\n", + "**Key Features of ASGI**\n", + "\n", + "1. **Asynchronous**: ASGI allows for asynchronous execution of web requests, which means that the server can handle multiple requests concurrently without blocking.\n", + "2. **Event-driven**: ASGI uses an event-driven approach, where the server waits for events (e.g., incoming requests) and then responds to them.\n", + "3. **Non-blocking**: ASGI allows the server to continue executing other tasks while waiting for an event, making it more efficient and scalable.\n", + "\n", + "**How ASGI Works in Python**\n", + "\n", + "Here's a high-level overview of how ASGI works in Python:\n", + "\n", + "1. **ASGI Application**: An ASGI application is a Python function that takes an `app` object as an argument. The `app` object represents the ASGI server and provides methods for handling requests and responses.\n", + "2. **Request Handling**: When a request is received, the ASGI server creates an `ASGIRequest` object, which contains information about the request, such as the URL, headers, and body.\n", + "3. **Middleware**: ASGI applications can use middleware functions to modify or extend the request and response objects. Middleware functions are executed before and after the ASGI application handles the request.\n", + "4. **ASGI Application Execution**: The ASGI application is executed, which may involve calling other functions or methods to handle the request.\n", + "5. **Response Generation**: The ASGI application generates a response, which is an `ASGIResponse` object that contains information about the response, such as the status code, headers, and body.\n", + "6. **Response Sending**: The ASGI server sends the response back to the client.\n", + "\n", + "**ASGI Frameworks in Python**\n", + "\n", + "Some popular ASGI frameworks in Python include:\n", + "\n", + "1. **Sanic**: A modern, asynchronous web framework that provides a simple and efficient way to build web applications.\n", + "2. **Aiohttp**: A popular asynchronous HTTP client and server library that provides a simple and efficient way to build web applications.\n", + "3. **FastAPI**: A modern, asynchronous web framework that provides a simple and efficient way to build web applications with high performance and scalability.\n", + "\n", + "**Example Code**\n", + "\n", + "Here's an example of a simple ASGI application using the Sanic framework:\n", + "```python\n", + "from sanic import Sanic\n", + "\n", + "app = Sanic()\n", + "\n", + "@app.route('/')\n", + "async def index(request):\n", + " return 'Hello, World!'\n", + "\n", + "if __name__ == '__main__':\n", + " app.run()\n", + "```\n", + "This code defines a simple ASGI application that responds to GET requests to the root URL ('/') with the string 'Hello, World!'.\n", + "\n", + "I hope this helps you understand how ASGI works in Python! Let me know if you have any questions or need further clarification." + ] + } + ], + "source": [ + "from llama_models.datatypes import SamplingParams\n", + "\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.2-3B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Explain to me how ASGI in python works\"},\n", + " ],\n", + " stream=True,\n", + " sampling_params=SamplingParams(\n", + " temperature=0,\n", + " ),\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk.event.delta, end='')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "**What is ASGI?**\n", + "\n", + "ASGI (Asynchronous Server Gateway Interface) is a standard for building asynchronous web servers and frameworks in Python. It was designed to replace the traditional WSGI (Web Server Gateway Interface) standard, which is synchronous and not suitable for modern asynchronous web development.\n", + "\n", + "**Key Features of ASGI**\n", + "\n", + "1. **Asynchronous**: ASGI allows for asynchronous execution of web requests, which means that the server can handle multiple requests concurrently without blocking.\n", + "2. **Event-driven**: ASGI uses an event-driven approach, where the server waits for events (e.g., incoming requests) and then responds to them.\n", + "3. **Non-blocking**: ASGI allows the server to continue executing other tasks while waiting for an event, making it more efficient and scalable.\n", + "\n", + "**How ASGI Works in Python**\n", + "\n", + "Here's a high-level overview of how ASGI works in Python:\n", + "\n", + "1. **ASGI Application**: An ASGI application is a Python function that takes an `app` object as an argument. The `app` object represents the ASGI server and provides methods for handling requests and responses.\n", + "2. **Request Handling**: When a request is received, the ASGI server creates an `ASGIRequest` object, which contains information about the request, such as the URL, headers, and body.\n", + "3. **Middleware**: ASGI applications can use middleware functions to modify or extend the request and response objects. Middleware functions are executed before and after the ASGI application handles the request.\n", + "4. **ASGI Application Execution**: The ASGI application is executed, which may involve calling other functions or methods to handle the request.\n", + "5. **Response Generation**: The ASGI application generates a response, which is an `ASGIResponse` object that contains information about the response, such as the status code, headers, and body.\n", + "6. **Response Sending**: The ASGI server sends the response back to the client.\n", + "\n", + "**ASGI Frameworks in Python**\n", + "\n", + "Some popular ASGI frameworks in Python include:\n", + "\n", + "1. **Sanic**: A modern, asynchronous web framework that provides a simple and efficient way to build web applications.\n", + "2. **Aiohttp**: A popular asynchronous HTTP client and server library that provides a simple and efficient way to build web applications.\n", + "3. **FastAPI**: A modern, asynchronous web framework that provides a simple and efficient way to build web applications with high performance and scalability.\n", + "\n", + "**Example Code**\n", + "\n", + "Here's an example of a simple ASGI application using the Sanic framework:\n", + "```python\n", + "from sanic import Sanic\n", + "\n", + "app = Sanic()\n", + "\n", + "@app.route('/')\n", + "async def index(request):\n", + " return 'Hello, World!'\n", + "\n", + "if __name__ == '__main__':\n", + " app.run()\n", + "```\n", + "This code defines a simple ASGI application that responds to GET requests to the root URL ('/') with the string 'Hello, World!'.\n", + "\n", + "I hope this helps you understand how ASGI works in Python! Let me know if you have any questions or need further clarification." + ] + } + ], + "source": [ + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.2-3B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Explain to me how ASGI in python works\"},\n", + " ],\n", + " stream=True,\n", + " sampling_params=SamplingParams(\n", + " top_p=1\n", + " ),\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk.event.delta, end='')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "**What is ASGI?**\n", + "\n", + "ASGI (Asynchronous Server Gateway Interface) is a standard for building asynchronous web servers and frameworks in Python. It was designed to replace the traditional WSGI (Web Server Gateway Interface) standard, which is synchronous" + ] + } + ], + "source": [ + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.2-3B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Explain to me how ASGI in python works\"},\n", + " ],\n", + " stream=True,\n", + " sampling_params=SamplingParams(\n", + " max_tokens=50\n", + " ),\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk.event.delta, end='')" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponseStreamChunkEvent(delta='', event_type='start', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='**', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='What', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' is', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' AS', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='GI', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='?', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='**\\n\\n', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='AS', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='GI', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' (', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='As', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='ynchronous', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' Server', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' Gateway', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' Interface', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=')', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' is', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' a', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' standard', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' for', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' building', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' asynchronous', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' web', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' servers', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' and', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' frameworks', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' in', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' Python', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='.', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' It', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' was', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' designed', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' to', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' replace', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' the', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' traditional', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' W', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='SG', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='I', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' (', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='Web', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' Server', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' Gateway', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' Interface', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=')', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' standard', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=',', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' which', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' is', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta=' synchronous', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='', event_type='progress', logprobs=None, stop_reason=None)\n", + "ChatCompletionResponseStreamChunkEvent(delta='', event_type='complete', logprobs=None, stop_reason='end_of_message')\n" + ] + } + ], + "source": [ + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.2-3B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Explain to me how ASGI in python works\"},\n", + " ],\n", + " stream=True,\n", + " sampling_params=SamplingParams(\n", + " max_tokens=50\n", + " ),\n", + " logprobs={\n", + " \"top_k\": 10,\n", + " },\n", + ")\n", + "\n", + "# for chunk in response:\n", + "# print(chunk.event.delta, end='')\n", + "for chunk in response:\n", + " print(chunk.event)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ChatCompletionResponse(completion_message=CompletionMessage(content='', role='assistant', stop_reason='end_of_message', tool_calls=[ToolCall(arguments={'origin': 'ADL', 'destination': 'SYD'}, call_id='call_grvk', tool_name='get_flight_info')]), logprobs=None)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Regular non-streamed tool call\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"When's the next flight from Adelaide to Sydney?\"},\n", + " ],\n", + " # stream=True,\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"get_flight_info\",\n", + " \"description\": \"Get the flight information for a given origin and destination\",\n", + " \"parameters\": {\n", + " \"origin\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The origin airport code. E.g., AU\",\n", + " \"required\": True,\n", + " },\n", + " \"destination\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The destination airport code. E.g., 'LAX'\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " }\n", + " ]\n", + ")\n", + "\n", + "# for chunk in response:\n", + "# print(chunk.event.delta, end='')\n", + "response" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "ChatCompletion(\n", + " id='chatcmpl-7f14606b-d091-4b12-9d13-e95831f04301',\n", + " choices=[\n", + " Choice(\n", + " finish_reason='tool_calls',\n", + " index=0,\n", + " logprobs=None,\n", + " message=ChatCompletionMessage(\n", + " content=None,\n", + " role='assistant',\n", + " function_call=None,\n", + " tool_calls=[ChatCompletionMessageToolCall(id='call_4qg1', function=Function(arguments='{\"origin\":\"ADL\",\"destination\":\"SYD\"}', name='get_flight_info'), type='function')]\n", + " )\n", + " )\n", + " ],\n", + " created=1733917567,\n", + " model='llama3-8b-8192',\n", + " object='chat.completion',\n", + " system_fingerprint='fp_a97cfe35ae',\n", + " usage=CompletionUsage(completion_tokens=76, prompt_tokens=972, total_tokens=1048, completion_time=0.063333333, prompt_time=0.11611327, queue_time=0.0061331509999999895, total_time=0.179446603),\n", + " x_groq={'id': 'req_01jetrmtcmfs89v7qyw8fdx1v0'}\n", + ")\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Regular non-streamed tool call\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"When's the next flight from Adelaide to Sydney?\"},\n", + " ],\n", + " # stream=True,\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"get_flight_info\",\n", + " \"description\": \"Get the flight information for a given origin and destination\",\n", + " \"parameters\": {\n", + " \"origin\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The origin airport code. E.g., AU\",\n", + " \"required\": True,\n", + " },\n", + " \"destination\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The destination airport code. E.g., 'LAX'\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " }\n", + " ]\n", + ")\n", + "\n", + "# for chunk in response:\n", + "# print(chunk.event.delta, end='')\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(content=ToolCall(arguments={'origin': 'ADL', 'destination': 'SYD'}, call_id='call_b83m', tool_name='get_flight_info'), parse_status='in_progress')\n", + "ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(content=ToolCall(arguments={'date_from': '2024-01-01', 'num_nights': 3.0, 'post_code': '2000', 'smoking_friendly': True}, call_id='call_3jcd', tool_name='get_hotel_info'), parse_status='in_progress')\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# Streamed tool call\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"When's the next flight from Adelaide to Sydney? Also find a hotel in the Sydney area for 3 nights starting on the 1st of January 2024. Should be smoking friendly.\"},\n", + " ],\n", + " stream=True,\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"get_flight_info\",\n", + " \"description\": \"Get the flight information for a given origin and destination\",\n", + " \"parameters\": {\n", + " \"origin\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The origin airport code. E.g., AU\",\n", + " \"required\": True,\n", + " },\n", + " \"destination\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The destination airport code. E.g., 'LAX'\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"get_hotel_info\",\n", + " \"description\": \"Get the hotel information for a given destination\",\n", + " \"parameters\": {\n", + " \"post_code\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The post code of the area to search for hotels. E.g., '2000'\",\n", + " \"required\": True,\n", + " },\n", + " \"num_nights\": {\n", + " \"param_type\": \"integer\",\n", + " \"description\": \"The number of nights to stay. E.g., 3\",\n", + " \"required\": True,\n", + " },\n", + " \"date_from\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The date to start the stay formatted as YYYY-MM-DD. E.g., '2024-01-01'\",\n", + " \"required\": True,\n", + " },\n", + " \"smoking_friendly\": {\n", + " \"param_type\": \"boolean\",\n", + " \"description\": \"Whether the hotel is smoking friendly. E.g., True\",\n", + " \"required\": False,\n", + " },\n", + " }\n", + " },\n", + " ]\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk.event.delta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "ChatCompletionChunk(\n", + " id='chatcmpl-189b0530-6bcb-4089-bad7-65f73104b182', \n", + " choices=[\n", + " Choice(\n", + " delta=ChoiceDelta(content=None, function_call=None, role='assistant', tool_calls=None), \n", + " finish_reason=None, \n", + " index=0, \n", + " logprobs=None\n", + " )\n", + " ], \n", + " created=1733955177, \n", + " model='llama3-8b-8192', \n", + " object='chat.completion.chunk', \n", + " system_fingerprint='fp_a97cfe35ae', \n", + " usage=None, \n", + " x_groq=XGroq(id='req_01jevwgjx2f3maj4rbzaaexagx', usage=None, error=None))\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponse(completion_message=CompletionMessage(content='', role='assistant', stop_reason='end_of_message', tool_calls=[ToolCall(arguments={'origin': 'ADL', 'destination': 'SYD'}, call_id='call_rg3e', tool_name='get_flight_info'), ToolCall(arguments={'date_from': '2024-01-01', 'num_nights': 3.0, 'post_code': '2000', 'smoking_friendly': True}, call_id='call_jhgw', tool_name='get_hotel_info')]), logprobs=None)\n", + "ToolCall(arguments={'origin': 'ADL', 'destination': 'SYD'}, call_id='call_rg3e', tool_name='get_flight_info')\n", + "ToolCall(arguments={'date_from': '2024-01-01', 'num_nights': 3.0, 'post_code': '2000', 'smoking_friendly': True}, call_id='call_jhgw', tool_name='get_hotel_info')\n" + ] + } + ], + "source": [ + "# Multiple tool calls in one chat completion\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"When's the next flight from Adelaide to Sydney? Also find a hotel in the Sydney area for 3 nights starting on the 1st of January 2024. Should be smoking friendly.\"},\n", + " ],\n", + " # stream=True,\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"get_flight_info\",\n", + " \"description\": \"Get the flight information for a given origin and destination\",\n", + " \"parameters\": {\n", + " \"origin\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The origin airport code. E.g., AU\",\n", + " \"required\": True,\n", + " },\n", + " \"destination\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The destination airport code. E.g., 'LAX'\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"get_hotel_info\",\n", + " \"description\": \"Get the hotel information for a given destination\",\n", + " \"parameters\": {\n", + " \"post_code\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The post code of the area to search for hotels. E.g., '2000'\",\n", + " \"required\": True,\n", + " },\n", + " \"num_nights\": {\n", + " \"param_type\": \"integer\",\n", + " \"description\": \"The number of nights to stay. E.g., 3\",\n", + " \"required\": True,\n", + " },\n", + " \"date_from\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The date to start the stay formatted as YYYY-MM-DD. E.g., '2024-01-01'\",\n", + " \"required\": True,\n", + " },\n", + " \"smoking_friendly\": {\n", + " \"param_type\": \"boolean\",\n", + " \"description\": \"Whether the hotel is smoking friendly. E.g., True\",\n", + " \"required\": False,\n", + " },\n", + " }\n", + " },\n", + " ]\n", + ")\n", + "\n", + "print(response)\n", + "\n", + "for tool_call in response.completion_message.tool_calls:\n", + " print(tool_call)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponse(completion_message=CompletionMessage(content='', role='assistant', stop_reason='end_of_message', tool_calls=[ToolCall(arguments={'origin': 'ADL', 'destination': 'SYD', 'passengers': '[{\"name\": \"John\", \"age\": 35, \"class\": \"Economy\"}, {\"name\": \"Jane\", \"age\": 32, \"class\": \"Economy\"}, {\"name\": \"Timmy\", \"age\": 8, \"class\": \"Economy\"}]', 'wifi': True, 'meal': True}, call_id='call_0ymk', tool_name='get_flight_info'), ToolCall(arguments={'address': {'street_address': '123 Main St', 'city': 'Sydney', 'state': 'NSW', 'post_code': '2000'}, 'date_from': '2024-01-01', 'num_nights': 3.0, 'smoking_friendly': True}, call_id='call_139y', tool_name='get_hotel_info')]), logprobs=None)\n", + "ToolCall(arguments={'origin': 'ADL', 'destination': 'SYD', 'passengers': '[{\"name\": \"John\", \"age\": 35, \"class\": \"Economy\"}, {\"name\": \"Jane\", \"age\": 32, \"class\": \"Economy\"}, {\"name\": \"Timmy\", \"age\": 8, \"class\": \"Economy\"}]', 'wifi': True, 'meal': True}, call_id='call_0ymk', tool_name='get_flight_info')\n", + "ToolCall(arguments={'address': {'street_address': '123 Main St', 'city': 'Sydney', 'state': 'NSW', 'post_code': '2000'}, 'date_from': '2024-01-01', 'num_nights': 3.0, 'smoking_friendly': True}, call_id='call_139y', tool_name='get_hotel_info')\n" + ] + } + ], + "source": [ + "# Tool call with object and array parameters\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant helping users book flights and hotels.\"},\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " When's the next flight from Adelaide to Sydney? (I only want direct flights and the flight should have wifi and a meal.)\n", + " The flight should fit 2 adults and 1 child. Economy class.\n", + " Also find a hotel in the Sydney area for 3 nights starting on the 1st of January 2024. (Should be smoking friendly.)\n", + " \"\"\"},\n", + " ],\n", + " # stream=True,\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"get_flight_info\",\n", + " \"description\": \"Get the flight information for a given origin and destination\",\n", + " \"parameters\": {\n", + " \"origin\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The origin airport code. E.g., AU\",\n", + " \"required\": True,\n", + " },\n", + " \"destination\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The destination airport code. E.g., 'LAX'\",\n", + " \"required\": True,\n", + " },\n", + " \"passengers\": {\n", + " \"param_type\": \"array\",\n", + " \"description\": \"The passengers\",\n", + " \"items\": {\n", + " \"param_type\": \"object\",\n", + " \"properties\": {\n", + " \"age_group\": {\"param_type\": \"string\", \"description\": \"The age group of the passenger. E.g., 'adult', 'child', 'infant'\", \"required\": True},\n", + " \"flight_class\": {\"param_type\": \"string\", \"description\": \"The flight class of the passenger. E.g., 'economy', 'business', 'first_class'\", \"required\": True},\n", + " \"number\": {\"param_type\": \"integer\", \"description\": \"The number of passengers of this age group and flight class. E.g., 1, 2\", \"required\": True},\n", + " },\n", + " \"required\": True,\n", + " },\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"get_hotel_info\",\n", + " \"description\": \"Get the hotel information for a given destination\",\n", + " \"parameters\": {\n", + " \"address\": {\n", + " \"param_type\": \"object\",\n", + " \"description\": \"The address of the hotel. E.g., {'street_address': '123 Main St', 'city': 'Sydney', 'state': 'NSW', 'post_code': '2000'}\",\n", + " \"properties\": {\n", + " \"street_address\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The street address of the hotel. E.g., '123 Main St'\",\n", + " \"required\": True,\n", + " },\n", + " \"city\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The city of the hotel. E.g., 'Sydney'\",\n", + " \"required\": True,\n", + " },\n", + " \"state\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The state of the hotel. E.g., 'NSW'\",\n", + " \"required\": True,\n", + " },\n", + " \"post_code\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The post code of the hotel. E.g., '2000'\",\n", + " \"required\": True,\n", + " },\n", + " },\n", + " \"required\": True,\n", + " },\n", + " \"num_nights\": {\n", + " \"param_type\": \"integer\",\n", + " \"description\": \"The number of nights to stay. E.g., 3\",\n", + " \"required\": True,\n", + " },\n", + " \"date_from\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The date to start the stay formatted as YYYY-MM-DD. E.g., '2024-01-01'\",\n", + " \"required\": True,\n", + " },\n", + " \"smoking_friendly\": {\n", + " \"param_type\": \"boolean\",\n", + " \"description\": \"Whether the hotel is smoking friendly. E.g., True\",\n", + " \"required\": False,\n", + " },\n", + " }\n", + " },\n", + " ]\n", + ")\n", + "\n", + "print(response)\n", + "\n", + "for tool_call in response.completion_message.tool_calls:\n", + " print(tool_call)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "ename": "BadRequestError", + "evalue": "Error code: 400 - {'detail': 'Invalid value: (\\'Groq failed to call a tool\\', {\\'message\\': \"Failed to call a function. Please adjust your prompt. See \\'failed_generation\\' for more details.\", \\'type\\': \\'invalid_request_error\\', \\'code\\': \\'tool_use_failed\\', \\'failed_generation\\': \\'{origin=\"ADL\",destination=\"SYD\"}\\'})'}", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mBadRequestError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[77], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Smaller model tool call test\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mLlama3.2-3B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrole\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhen\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43ms the next flight from Adelaide to Sydney?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# stream=True,\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_name\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mget_flight_info\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGet the flight information for a given origin and destination\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43morigin\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstring\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe origin airport code. E.g., AU\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdestination\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstring\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe destination airport code. E.g., \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mLAX\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# for chunk in response:\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;66;03m# print(chunk.event.delta, end='')\u001b[39;00m\n\u001b[1;32m 30\u001b[0m response\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1251\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1258\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1260\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1261\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1262\u001b[0m )\n\u001b[0;32m-> 1263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 953\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1055\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1057\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1061\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1062\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1066\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1067\u001b[0m )\n", + "\u001b[0;31mBadRequestError\u001b[0m: Error code: 400 - {'detail': 'Invalid value: (\\'Groq failed to call a tool\\', {\\'message\\': \"Failed to call a function. Please adjust your prompt. See \\'failed_generation\\' for more details.\", \\'type\\': \\'invalid_request_error\\', \\'code\\': \\'tool_use_failed\\', \\'failed_generation\\': \\'{origin=\"ADL\",destination=\"SYD\"}\\'})'}" + ] + } + ], + "source": [ + "# Smaller model tool call test\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.2-3B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"When's the next flight from Adelaide to Sydney?\"},\n", + " ],\n", + " # stream=True,\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"get_flight_info\",\n", + " \"description\": \"Get the flight information for a given origin and destination\",\n", + " \"parameters\": {\n", + " \"origin\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The origin airport code. E.g., AU\",\n", + " \"required\": True,\n", + " },\n", + " \"destination\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The destination airport code. E.g., 'LAX'\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " }\n", + " ]\n", + ")\n", + "\n", + "# for chunk in response:\n", + "# print(chunk.event.delta, end='')\n", + "response\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Server error we get:\n", + "```\n", + "groq.BadRequestError: Error code: 400 - {'error': {'message': \"Failed to call a function. Please adjust your prompt. See 'failed_generation' for more details.\", 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '{origin=\"ADL\",destination=\"SYD\"}'}}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponse(completion_message=CompletionMessage(content='', role='assistant', stop_reason='end_of_message', tool_calls=[ToolCall(arguments={'a': 2.5, 'b': 3.5}, call_id='call_k71c', tool_name='multiply')]), logprobs=None)\n", + "ToolCall(arguments={'a': 2.5, 'b': 3.5}, call_id='call_k71c', tool_name='multiply')\n" + ] + } + ], + "source": [ + "# Test for floats\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " Multiply 2.5 and 3.5\n", + " \"\"\"},\n", + " ],\n", + " # stream=True,\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"multiply\",\n", + " \"description\": \"Multiply two numbers\",\n", + " \"parameters\": {\n", + " \"a\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The first number to multiply. E.g., 2.5\",\n", + " \"required\": True,\n", + " },\n", + " \"b\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The second number to multiply. E.g., 3.5\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"get_hotel_info\",\n", + " \"description\": \"Get the hotel information for a given destination\",\n", + " \"parameters\": {\n", + " \"address\": {\n", + " \"param_type\": \"object\",\n", + " \"description\": \"The address of the hotel. E.g., {'street_address': '123 Main St', 'city': 'Sydney', 'state': 'NSW', 'post_code': '2000'}\",\n", + " \"properties\": {\n", + " \"street_address\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The street address of the hotel. E.g., '123 Main St'\",\n", + " \"required\": True,\n", + " },\n", + " \"city\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The city of the hotel. E.g., 'Sydney'\",\n", + " \"required\": True,\n", + " },\n", + " \"state\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The state of the hotel. E.g., 'NSW'\",\n", + " \"required\": True,\n", + " },\n", + " \"post_code\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The post code of the hotel. E.g., '2000'\",\n", + " \"required\": True,\n", + " },\n", + " },\n", + " \"required\": True,\n", + " },\n", + " \"num_nights\": {\n", + " \"param_type\": \"integer\",\n", + " \"description\": \"The number of nights to stay. E.g., 3\",\n", + " \"required\": True,\n", + " },\n", + " \"date_from\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The date to start the stay formatted as YYYY-MM-DD. E.g., '2024-01-01'\",\n", + " \"required\": True,\n", + " },\n", + " \"smoking_friendly\": {\n", + " \"param_type\": \"boolean\",\n", + " \"description\": \"Whether the hotel is smoking friendly. E.g., True\",\n", + " \"required\": False,\n", + " },\n", + " }\n", + " },\n", + " ]\n", + ")\n", + "\n", + "print(response)\n", + "\n", + "for tool_call in response.completion_message.tool_calls:\n", + " print(tool_call)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponse(completion_message=CompletionMessage(content='', role='assistant', stop_reason='end_of_message', tool_calls=[ToolCall(arguments={'query': 'ASGI in python explanation'}, call_id='call_bqcb', tool_name='brave_search'), ToolCall(arguments={'query': 'ASGI python architecture'}, call_id='call_2k5t', tool_name='brave_search'), ToolCall(arguments={'query': 'ASGI python example'}, call_id='call_7d2g', tool_name='brave_search')]), logprobs=None)\n", + "ToolCall(arguments={'query': 'ASGI in python explanation'}, call_id='call_bqcb', tool_name='brave_search')\n", + "ToolCall(arguments={'query': 'ASGI python architecture'}, call_id='call_2k5t', tool_name='brave_search')\n", + "ToolCall(arguments={'query': 'ASGI python example'}, call_id='call_7d2g', tool_name='brave_search')\n" + ] + } + ], + "source": [ + "# Test tool choice\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " Explain to me how ASGI in python works\n", + " \"\"\"},\n", + " ],\n", + " # stream=True,\n", + " tool_choice=\"required\",\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"multiply\",\n", + " \"description\": \"Multiply two numbers\",\n", + " \"parameters\": {\n", + " \"a\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The first number to multiply. E.g., 2.5\",\n", + " \"required\": True,\n", + " },\n", + " \"b\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The second number to multiply. E.g., 3.5\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"get_hotel_info\",\n", + " \"description\": \"Get the hotel information for a given destination\",\n", + " \"parameters\": {\n", + " \"address\": {\n", + " \"param_type\": \"object\",\n", + " \"description\": \"The address of the hotel. E.g., {'street_address': '123 Main St', 'city': 'Sydney', 'state': 'NSW', 'post_code': '2000'}\",\n", + " \"properties\": {\n", + " \"street_address\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The street address of the hotel. E.g., '123 Main St'\",\n", + " \"required\": True,\n", + " },\n", + " \"city\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The city of the hotel. E.g., 'Sydney'\",\n", + " \"required\": True,\n", + " },\n", + " \"state\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The state of the hotel. E.g., 'NSW'\",\n", + " \"required\": True,\n", + " },\n", + " \"post_code\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The post code of the hotel. E.g., '2000'\",\n", + " \"required\": True,\n", + " },\n", + " },\n", + " \"required\": True,\n", + " },\n", + " \"num_nights\": {\n", + " \"param_type\": \"integer\",\n", + " \"description\": \"The number of nights to stay. E.g., 3\",\n", + " \"required\": True,\n", + " },\n", + " \"date_from\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The date to start the stay formatted as YYYY-MM-DD. E.g., '2024-01-01'\",\n", + " \"required\": True,\n", + " },\n", + " \"smoking_friendly\": {\n", + " \"param_type\": \"boolean\",\n", + " \"description\": \"Whether the hotel is smoking friendly. E.g., True\",\n", + " \"required\": False,\n", + " },\n", + " }\n", + " },\n", + " ]\n", + ")\n", + "\n", + "print(response)\n", + "\n", + "for tool_call in response.completion_message.tool_calls:\n", + " print(tool_call)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponse(completion_message=CompletionMessage(content=\"ASGI (Asynchronous Server Gateway Interface) is a standard for building asynchronous web servers and applications in Python. It was created as a replacement for the older WSGI (Web Server Gateway Interface) standard, which was designed for synchronous web development.\\n\\nHere's a high-level overview of how ASGI works:\\n\\n**Key Components**\\n\\n1. **Server**: The ASGI server is responsible for handling incoming requests and sending responses back to the client. It acts as a gateway between the client and the application.\\n2. **Application**: The ASGI application is a Python callable that handles incoming requests and returns responses. It's the core logic of the web application.\\n3. **Protocol**: The ASGI protocol defines the communication between the server and the application. It's responsible for sending and receiving messages, such as requests and responses.\\n\\n**ASGI Request/Response Cycle**\\n\\nHere's a step-by-step explanation of the ASGI request/response cycle:\\n\\n1. **Client Request**: A client (e.g., a web browser) sends an HTTP request to the ASGI server.\\n2. **Server Receives Request**: The ASGI server receives the request and passes it to the application.\\n3. **Application Processes Request**: The ASGI application processes the request and returns a response.\\n4. **Server Sends Response**: The ASGI server sends the response back to the client.\\n5. **Client Receives Response**: The client receives the response and displays it to the user.\\n\\n**ASGI Protocol**\\n\\nThe ASGI protocol defines the following messages:\\n\\n* **ASGI_FRAME**: A single frame of data, which can be a request or response.\\n* **ASGI_MESSAGE**: A message that contains multiple frames.\\n* **ASGI_CLOSE**: A message that indicates the end of the request/response cycle.\\n\\n**ASGI Server Implementations**\\n\\nSome popular ASGI server implementations include:\\n\\n* **Uvicorn**: A fast and lightweight ASGI server.\\n* **Hypercorn**: A high-performance ASGI server.\\n* **Daphne**: A ASGI server that supports WebSockets and other real-time protocols.\\n\\n**ASGI Frameworks**\\n\\nSome popular ASGI frameworks include:\\n\\n* **Starlette**: A lightweight ASGI framework that provides a simple and intuitive API.\\n* **FastAPI**: A modern, fast (high-performance), web framework for building APIs with Python 3.7+ based on standard Python type hints.\\n* **Asgiref**: A reference implementation of the ASGI protocol.\\n\\nIn summary, ASGI is a standard for building asynchronous web servers and applications in Python. It defines a protocol for communication between the server and the application, and provides a framework for building web applications that can handle multiple requests concurrently.\", role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" + ] + } + ], + "source": [ + "# Test tool choice\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " Explain to me how ASGI in python works\n", + " \"\"\"},\n", + " ],\n", + " # stream=True,\n", + " tool_choice=\"auto\",\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"multiply\",\n", + " \"description\": \"Multiply two numbers\",\n", + " \"parameters\": {\n", + " \"a\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The first number to multiply. E.g., 2.5\",\n", + " \"required\": True,\n", + " },\n", + " \"b\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The second number to multiply. E.g., 3.5\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"get_hotel_info\",\n", + " \"description\": \"Get the hotel information for a given destination\",\n", + " \"parameters\": {\n", + " \"address\": {\n", + " \"param_type\": \"object\",\n", + " \"description\": \"The address of the hotel. E.g., {'street_address': '123 Main St', 'city': 'Sydney', 'state': 'NSW', 'post_code': '2000'}\",\n", + " \"properties\": {\n", + " \"street_address\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The street address of the hotel. E.g., '123 Main St'\",\n", + " \"required\": True,\n", + " },\n", + " \"city\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The city of the hotel. E.g., 'Sydney'\",\n", + " \"required\": True,\n", + " },\n", + " \"state\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The state of the hotel. E.g., 'NSW'\",\n", + " \"required\": True,\n", + " },\n", + " \"post_code\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The post code of the hotel. E.g., '2000'\",\n", + " \"required\": True,\n", + " },\n", + " },\n", + " \"required\": True,\n", + " },\n", + " \"num_nights\": {\n", + " \"param_type\": \"integer\",\n", + " \"description\": \"The number of nights to stay. E.g., 3\",\n", + " \"required\": True,\n", + " },\n", + " \"date_from\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The date to start the stay formatted as YYYY-MM-DD. E.g., '2024-01-01'\",\n", + " \"required\": True,\n", + " },\n", + " \"smoking_friendly\": {\n", + " \"param_type\": \"boolean\",\n", + " \"description\": \"Whether the hotel is smoking friendly. E.g., True\",\n", + " \"required\": False,\n", + " },\n", + " }\n", + " },\n", + " ]\n", + ")\n", + "\n", + "print(response)\n", + "\n", + "for tool_call in response.completion_message.tool_calls:\n", + " print(tool_call)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "ename": "BadRequestError", + "evalue": "Error code: 400 - {'detail': 'Invalid value: groq only supports json tool_prompt_format'}", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mBadRequestError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[68], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Test tool choice\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mLlama3.1-8B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrole\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m Explain to me how ASGI in python works\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# stream=True,\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfunction_tag\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_name\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmultiply\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMultiply two numbers\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ma\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfloat\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe first number to multiply. E.g., 2.5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfloat\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe second number to multiply. E.g., 3.5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_name\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mget_hotel_info\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGet the hotel information for a given destination\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43maddress\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mobject\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe address of the hotel. E.g., \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mstreet_address\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m: \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m123 Main St\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcity\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m: \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSydney\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mstate\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m: \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNSW\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpost_code\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m: \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m2000\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m}\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mproperties\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstreet_address\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstring\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe street address of the hotel. E.g., \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m123 Main St\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcity\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstring\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe city of the hotel. E.g., \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSydney\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 47\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstring\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe state of the hotel. E.g., \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNSW\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 51\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost_code\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstring\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe post code of the hotel. E.g., \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m2000\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnum_nights\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minteger\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe number of nights to stay. E.g., 3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 64\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdate_from\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 66\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstring\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 67\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mThe date to start the stay formatted as YYYY-MM-DD. E.g., \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m2024-01-01\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 68\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 69\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 70\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msmoking_friendly\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparam_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mboolean\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 72\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhether the hotel is smoking friendly. E.g., True\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrequired\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 74\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 75\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 78\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28mprint\u001b[39m(response)\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tool_call \u001b[38;5;129;01min\u001b[39;00m response\u001b[38;5;241m.\u001b[39mcompletion_message\u001b[38;5;241m.\u001b[39mtool_calls:\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1251\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1258\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1260\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1261\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1262\u001b[0m )\n\u001b[0;32m-> 1263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 953\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/dev/llama-stack/envs/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1055\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1057\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1061\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1062\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1066\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1067\u001b[0m )\n", + "\u001b[0;31mBadRequestError\u001b[0m: Error code: 400 - {'detail': 'Invalid value: groq only supports json tool_prompt_format'}" + ] + } + ], + "source": [ + "# Test tool choice\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " Explain to me how ASGI in python works\n", + " \"\"\"},\n", + " ],\n", + " # stream=True,\n", + " tool_choice=\"required\",\n", + " tool_prompt_format=\"function_tag\",\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"multiply\",\n", + " \"description\": \"Multiply two numbers\",\n", + " \"parameters\": {\n", + " \"a\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The first number to multiply. E.g., 2.5\",\n", + " \"required\": True,\n", + " },\n", + " \"b\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The second number to multiply. E.g., 3.5\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"get_hotel_info\",\n", + " \"description\": \"Get the hotel information for a given destination\",\n", + " \"parameters\": {\n", + " \"address\": {\n", + " \"param_type\": \"object\",\n", + " \"description\": \"The address of the hotel. E.g., {'street_address': '123 Main St', 'city': 'Sydney', 'state': 'NSW', 'post_code': '2000'}\",\n", + " \"properties\": {\n", + " \"street_address\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The street address of the hotel. E.g., '123 Main St'\",\n", + " \"required\": True,\n", + " },\n", + " \"city\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The city of the hotel. E.g., 'Sydney'\",\n", + " \"required\": True,\n", + " },\n", + " \"state\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The state of the hotel. E.g., 'NSW'\",\n", + " \"required\": True,\n", + " },\n", + " \"post_code\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The post code of the hotel. E.g., '2000'\",\n", + " \"required\": True,\n", + " },\n", + " },\n", + " \"required\": True,\n", + " },\n", + " \"num_nights\": {\n", + " \"param_type\": \"integer\",\n", + " \"description\": \"The number of nights to stay. E.g., 3\",\n", + " \"required\": True,\n", + " },\n", + " \"date_from\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The date to start the stay formatted as YYYY-MM-DD. E.g., '2024-01-01'\",\n", + " \"required\": True,\n", + " },\n", + " \"smoking_friendly\": {\n", + " \"param_type\": \"boolean\",\n", + " \"description\": \"Whether the hotel is smoking friendly. E.g., True\",\n", + " \"required\": False,\n", + " },\n", + " }\n", + " },\n", + " ]\n", + ")\n", + "\n", + "print(response)\n", + "\n", + "for tool_call in response.completion_message.tool_calls:\n", + " print(tool_call)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='start', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='complete', logprobs=None, stop_reason='end_of_message'))\n" + ] + } + ], + "source": [ + "# 10 thousand character tool call\n", + "# We hit the max output token limit for 3.1-8b-instruct\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " Count the number of characters in the following text:\n", + " Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non. Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non. Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. \n", + " \"\"\"},\n", + " ],\n", + " stream=True,\n", + " tool_choice=\"required\",\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"count_characters\",\n", + " \"description\": \"Count the number of characters in a text\",\n", + " \"parameters\": {\n", + " \"text\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The text to count the characters of\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " }\n", + " ]\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='start', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta=ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(content=ToolCall(arguments={'text': 'Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.'}, call_id='call_676h', tool_name='count_characters'), parse_status='in_progress'), event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='complete', logprobs=None, stop_reason='end_of_message'))\n" + ] + } + ], + "source": [ + "# 10 thousand character tool call\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama-3-70B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " Count the number of characters in the following text:\n", + " Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non. Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non.Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. In nec lorem metus. Nunc molestie mollis enim, vitae volutpat elit blandit non. Donec eu lorem eget quam accumsan iaculis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. per conubia nostra, per inceptos himenaeos. Aenean felis tortor, tincidunt eu purus at, lacinia mollis mi. Aliquam lacinia molestie augue ac vestibulum. Duis ante lacus, vulputate a sollicitudin in, consectetur fermentum augue. Maecenas eget risus a dolor mattis feugiat. Integer accumsan tempor elit vel imperdiet. Donec et dignissim velit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Mauris dictum, nibh id varius accumsan, massa risus aliquam diam, at pulvinar risus risus sit amet erat. \n", + " \"\"\"},\n", + " ],\n", + " stream=True,\n", + " tool_choice=\"required\",\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"count_characters\",\n", + " \"description\": \"Count the number of characters in a text\",\n", + " \"parameters\": {\n", + " \"text\": {\n", + " \"param_type\": \"string\",\n", + " \"description\": \"The text to count the characters of\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " }\n", + " ]\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='start', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta=ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(content=ToolCall(arguments={'message': 10.0}, call_id='call_gjk5', tool_name='calculate_log'), parse_status='in_progress'), event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta=ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(content=ToolCall(arguments={'a': 3.52, 'b': 4.89}, call_id='call_qw70', tool_name='add'), parse_status='in_progress'), event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta=ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(content=ToolCall(arguments={'a': 3.52, 'b': 4.89}, call_id='call_f64g', tool_name='multiply'), parse_status='in_progress'), event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='complete', logprobs=None, stop_reason='end_of_message'))\n" + ] + } + ], + "source": [ + "# Test multiple tool calls streaming\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " What is the logarithm of 10? What is 3.52 + 4.89? What is 3.52 * 4.89?\n", + " \"\"\"},\n", + " ],\n", + " stream=True,\n", + " tool_choice=\"required\",\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"multiply\",\n", + " \"description\": \"Multiply two numbers\",\n", + " \"parameters\": {\n", + " \"a\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The first number to multiply. E.g., 2.5\",\n", + " \"required\": True,\n", + " },\n", + " \"b\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The second number to multiply. E.g., 3.5\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"add\",\n", + " \"description\": \"Add two numbers\",\n", + " \"parameters\": {\n", + " \"a\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The first number to add. E.g., 2.5\",\n", + " \"required\": True,\n", + " },\n", + " \"b\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The second number to add. E.g., 3.5\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"log\",\n", + " \"description\": \"Calculate the logarithm of a number\",\n", + " \"parameters\": {\n", + " \"message\": {\n", + " \"param_type\": \"float\",\n", + " \"description\": \"The number to calculate the logarithm of. E.g., 10, 200, 5.5 etc.\",\n", + " \"required\": True,\n", + " },\n", + " }\n", + " }\n", + " ]\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='start', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta=ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(content=ToolCall(arguments={}, call_id='call_tgtr', tool_name='trigger_build'), parse_status='success'), event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='progress', logprobs=None, stop_reason=None))\n", + "ChatCompletionResponseStreamChunk(event=ChatCompletionResponseStreamChunkEvent(delta='', event_type='complete', logprobs=None, stop_reason='end_of_message'))\n" + ] + } + ], + "source": [ + "# No parameter tool call\n", + "response = client.inference.chat_completion(\n", + " model_id=\"Llama3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + " Trigger a build\n", + " \"\"\"},\n", + " ],\n", + " stream=True,\n", + " tool_choice=\"required\",\n", + " tools=[\n", + " {\n", + " \"tool_name\": \"trigger_build\",\n", + " \"description\": \"Trigger a build\",\n", + " \"parameters\": {\n", + " }\n", + " }\n", + " ]\n", + ")\n", + "\n", + "for chunk in response:\n", + " print(chunk)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/run.yaml b/run.yaml new file mode 100644 index 000000000..5695df886 --- /dev/null +++ b/run.yaml @@ -0,0 +1,80 @@ +version: '2' +image_name: groq +docker_image: null +conda_env: groq +apis: +- agents +- datasetio +- eval +- inference +- memory +- safety +- scoring +- telemetry +providers: + inference: + - provider_id: groq + provider_type: remote::groq + config: + api_key: ${env.GROQ_API_KEY} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: ${env.OTEL_SERVICE_NAME:llama-stack} + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/groq/trace_store.db} + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} + datasetio: + - provider_id: localfs + provider_type: inline::localfs + config: {} + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db +models: +- metadata: {} + model_id: Llama3.2-3B-Instruct + provider_id: groq + provider_model_id: null +- metadata: {} + model_id: Llama3.1-8B-Instruct + provider_id: groq + provider_model_id: null +- metadata: {} + model_id: Llama-3-70B-Instruct + provider_id: groq + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: []