{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Use LiteLLM to calculate costs for all your completion calls\n", "In this notebook we'll use `litellm.completion_cost` to get completion costs" ], "metadata": { "id": "BgWr0PsUR3vV" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ViczFTjsDzSI" }, "outputs": [], "source": [ "!pip install litellm==0.1.549 # use 0.1.549 or later" ] }, { "cell_type": "markdown", "source": [ "## Calculating costs for gpt-3.5 turbo completion()" ], "metadata": { "id": "k_1CWUwmSNtj" } }, { "cell_type": "code", "source": [ "from litellm import completion, completion_cost\n", "import os\n", "os.environ['OPENAI_API_KEY'] = \"\"\n", "\n", "messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", "response = completion(\n", " model=\"gpt-3.5-turbo\",\n", " messages=messages,\n", ")\n", "\n", "print(response)\n", "\n", "cost = completion_cost(completion_response=response)\n", "formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n", "print(formatted_string)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Tp0fyk-jD0pP", "outputId": "ce885fb3-3237-41b2-9d8b-3fb30bba498b" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "got response\n", "{\n", " \"id\": \"chatcmpl-7vyCApIZaCxP36kb9meUMN2DFSJPh\",\n", " \"object\": \"chat.completion\",\n", " \"created\": 1694050442,\n", " \"model\": \"gpt-3.5-turbo-0613\",\n", " \"choices\": [\n", " {\n", " \"index\": 0,\n", " \"message\": {\n", " \"role\": \"assistant\",\n", " \"content\": \"Hello! I'm an AI and I don't have feelings, but I'm here to help you. How can I assist you today?\"\n", " },\n", " \"finish_reason\": \"stop\"\n", " }\n", " ],\n", " \"usage\": {\n", " \"prompt_tokens\": 13,\n", " \"completion_tokens\": 28,\n", " \"total_tokens\": 41\n", " }\n", "}\n", "Cost for completion call: $0.0000755000\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Calculating costs for Together Computer completion()" ], "metadata": { "id": "AjDs4G-uS6PS" } }, { "cell_type": "code", "source": [ "from litellm import completion, completion_cost\n", "import os\n", "os.environ['TOGETHERAI_API_KEY'] = \"\"\n", "\n", "messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", "response = completion(\n", " model=\"togethercomputer/llama-2-70b-chat\",\n", " messages=messages,\n", ")\n", "\n", "print(response)\n", "\n", "cost = completion_cost(completion_response=response)\n", "formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n", "print(formatted_string)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jMPsUV-KEa6a", "outputId": "7a69b291-f149-4b9c-8a78-9c8142bac759" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"choices\": [\n", " {\n", " \"finish_reason\": \"stop\",\n", " \"index\": 0,\n", " \"message\": {\n", " \"content\": \"Hello! I'm doing well, thanks for asking. I hope you're having a great\",\n", " \"role\": \"assistant\",\n", " \"logprobs\": null\n", " }\n", " }\n", " ],\n", " \"created\": 1694050771.2821715,\n", " \"model\": \"togethercomputer/llama-2-70b-chat\",\n", " \"usage\": {\n", " \"prompt_tokens\": 12,\n", " \"completion_tokens\": 18,\n", " \"total_tokens\": 30\n", " }\n", "}\n", "Cost for completion call: $0.0000900000\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Calculating costs for Replicate Llama2 completion()" ], "metadata": { "id": "vEa4s6-7TANS" } }, { "cell_type": "code", "source": [ "from litellm import completion, completion_cost\n", "import os\n", "os.environ['REPLICATE_API_KEY'] = \"\"\n", "\n", "messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", "response = completion(\n", " model=\"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf\",\n", " messages=messages,\n", ")\n", "\n", "print(response)\n", "\n", "cost = completion_cost(completion_response=response)\n", "formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n", "print(formatted_string)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Xf1TKRDuS1bR", "outputId": "cfb2b484-a6e5-41ad-86c5-7e66aba27648" }, "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"choices\": [\n", " {\n", " \"finish_reason\": \"stop\",\n", " \"index\": 0,\n", " \"message\": {\n", " \"content\": \" Hello! I'm doing well, thanks for asking. How about you? Is there anything you need help with today?\",\n", " \"role\": \"assistant\",\n", " \"logprobs\": null\n", " }\n", " }\n", " ],\n", " \"created\": 1694050893.4534576,\n", " \"model\": \"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf\",\n", " \"usage\": {\n", " \"prompt_tokens\": 6,\n", " \"completion_tokens\": 24,\n", " \"total_tokens\": 30\n", " },\n", " \"ended\": 1694050896.6689413\n", "}\n", "total_replicate_run_time 3.2154836654663086\n", "Cost for completion call: $0.0045016771\n" ] } ] } ] }