litellm-mirror/cookbook/LiteLLM_GPTCache.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Using GPT Cache x LiteLLM\n",
        "- Cut costs 10x, improve speed 100x"
      ],
      "metadata": {
        "id": "kBwDrphDDEoO"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_K_4auSgCSjg"
      },
      "outputs": [],
      "source": [
        "!pip install litellm gptcache"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Usage\n",
        "* use `from litellm.cache import completion`\n",
        "* Init GPT Cache using the following lines:\n",
        "```python\n",
        "from gptcache import cache\n",
        "cache.init()\n",
        "cache.set_openai_key()\n",
        "```"
      ],
      "metadata": {
        "id": "DlZ22IfmDR5L"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## With OpenAI"
      ],
      "metadata": {
        "id": "js80pW9PC1KQ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from gptcache import cache\n",
        "import os\n",
        "from litellm.cache import completion # import completion from litellm.cache\n",
        "import time\n",
        "\n",
        "# Set your .env keys\n",
        "os.environ['OPENAI_API_KEY'] = \"\"\n",
        "\n",
        "##### GPT Cache Init\n",
        "cache.init()\n",
        "cache.set_openai_key()\n",
        "#### End of GPT Cache Init\n",
        "\n",
        "question = \"what's LiteLLM\"\n",
        "for _ in range(2):\n",
        "    start_time = time.time()\n",
        "    response = completion(\n",
        "      model='gpt-3.5-turbo',\n",
        "      messages=[\n",
        "        {\n",
        "            'role': 'user',\n",
        "            'content': question\n",
        "        }\n",
        "      ],\n",
        "    )\n",
        "    print(f'Question: {question}')\n",
        "    print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "24a-mg1OCWe1",
        "outputId": "36130cb6-9bd6-4bc6-8405-b6e19a1e9357"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "start to install package: redis\n",
            "successfully installed package: redis\n",
            "start to install package: redis_om\n",
            "successfully installed package: redis_om\n",
            "Question: what's LiteLLM\n",
            "Time consuming: 1.18s\n",
            "Question: what's LiteLLM\n",
            "Time consuming: 0.00s\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## With Cohere"
      ],
      "metadata": {
        "id": "xXPtHamPCy73"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from gptcache import cache\n",
        "import os\n",
        "from litellm.cache import completion # import completion from litellm.cache\n",
        "import time\n",
        "\n",
        "# Set your .env keys\n",
        "os.environ['COHERE_API_KEY'] = \"\"\n",
        "\n",
        "##### GPT Cache Init\n",
        "cache.init()\n",
        "cache.set_openai_key()\n",
        "#### End of GPT Cache Init\n",
        "\n",
        "question = \"what's LiteLLM Github\"\n",
        "for _ in range(2):\n",
        "    start_time = time.time()\n",
        "    response = completion(\n",
        "      model='gpt-3.5-turbo',\n",
        "      messages=[\n",
        "        {\n",
        "            'role': 'user',\n",
        "            'content': question\n",
        "        }\n",
        "      ],\n",
        "    )\n",
        "    print(f'Question: {question}')\n",
        "    print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "goRtiiAlChRW",
        "outputId": "47f473da-5560-4d6f-d9ef-525ff8e60758"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Question: what's LiteLLM Github\n",
            "Time consuming: 1.58s\n",
            "Question: what's LiteLLM Github\n",
            "Time consuming: 0.00s\n"
          ]
        }
      ]
    }
  ]
}