diff --git a/cookbook/LiteLLM_Caching.ipynb b/cookbook/LiteLLM_Caching.ipynb
new file mode 100644
index 000000000..1d025e4df
--- /dev/null
+++ b/cookbook/LiteLLM_Caching.ipynb
@@ -0,0 +1,123 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## LiteLLM Caching Tutorial\n",
+        "Link to using Caching in Docs:\n",
+        "https://docs.litellm.ai/docs/caching/"
+      ],
+      "metadata": {
+        "id": "Lvj-GI3YQfQx"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eKSBuuKn99Jm"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install litellm==0.1.492"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Set `caching_with_models` to True\n",
+        "Enables caching on a per-model basis.\n",
+        "Keys are the input messages + model and values stored in the cache is the corresponding response"
+      ],
+      "metadata": {
+        "id": "sFXj4UUnQpyt"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os, time, litellm\n",
+        "from litellm import completion\n",
+        "litellm.caching_with_models = True # set caching for each model to True\n"
+      ],
+      "metadata": {
+        "id": "xCea1EjR99rU"
+      },
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "os.environ['OPENAI_API_KEY'] = \"\""
+      ],
+      "metadata": {
+        "id": "VK3kXGXI-dtC"
+      },
+      "execution_count": 9,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Use LiteLLM Cache"
+      ],
+      "metadata": {
+        "id": "U_CDCcnjQ7c6"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "question = \"write 1 page about what's LiteLLM\"\n",
+        "for _ in range(2):\n",
+        "    start_time = time.time()\n",
+        "    response = completion(\n",
+        "      model='gpt-3.5-turbo',\n",
+        "      messages=[\n",
+        "        {\n",
+        "            'role': 'user',\n",
+        "            'content': question\n",
+        "        }\n",
+        "      ],\n",
+        "    )\n",
+        "    print(f'Question: {question}')\n",
+        "    print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Efli-J-t-bJH",
+        "outputId": "cfdb1e14-96b0-48ee-c504-7f567e84c349"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Question: write 1 page about what's LiteLLM\n",
+            "Time consuming: 13.53s\n",
+            "Question: write 1 page about what's LiteLLM\n",
+            "Time consuming: 0.00s\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file