litellm-mirror/cookbook/LiteLLM_Caching.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## LiteLLM Caching Tutorial\n",
        "Link to using Caching in Docs:\n",
        "https://docs.litellm.ai/docs/caching/"
      ],
      "metadata": {
        "id": "Lvj-GI3YQfQx"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "eKSBuuKn99Jm"
      },
      "outputs": [],
      "source": [
        "!pip install litellm==0.1.492"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Set `caching_with_models` to True\n",
        "Enables caching on a per-model basis.\n",
        "Keys are the input messages + model and values stored in the cache is the corresponding response"
      ],
      "metadata": {
        "id": "sFXj4UUnQpyt"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os, time, litellm\n",
        "from litellm import completion\n",
        "litellm.caching_with_models = True # set caching for each model to True\n"
      ],
      "metadata": {
        "id": "xCea1EjR99rU"
      },
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "os.environ['OPENAI_API_KEY'] = \"\""
      ],
      "metadata": {
        "id": "VK3kXGXI-dtC"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Use LiteLLM Cache"
      ],
      "metadata": {
        "id": "U_CDCcnjQ7c6"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "question = \"write 1 page about what's LiteLLM\"\n",
        "for _ in range(2):\n",
        "    start_time = time.time()\n",
        "    response = completion(\n",
        "      model='gpt-3.5-turbo',\n",
        "      messages=[\n",
        "        {\n",
        "            'role': 'user',\n",
        "            'content': question\n",
        "        }\n",
        "      ],\n",
        "    )\n",
        "    print(f'Question: {question}')\n",
        "    print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Efli-J-t-bJH",
        "outputId": "cfdb1e14-96b0-48ee-c504-7f567e84c349"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Question: write 1 page about what's LiteLLM\n",
            "Time consuming: 13.53s\n",
            "Question: write 1 page about what's LiteLLM\n",
            "Time consuming: 0.00s\n"
          ]
        }
      ]
    }
  ]
}