diff --git a/cookbook/LiteLLM_GPTCache.ipynb b/cookbook/LiteLLM_GPTCache.ipynb
new file mode 100644
index 000000000..6829ed0de
--- /dev/null
+++ b/cookbook/LiteLLM_GPTCache.ipynb
@@ -0,0 +1,181 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Using GPT Cache x LiteLLM\n",
+        "- Cut costs 10x, improve speed 100x"
+      ],
+      "metadata": {
+        "id": "kBwDrphDDEoO"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_K_4auSgCSjg"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install litellm gptcache"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Usage\n",
+        "* use `from litellm.cache import completion`\n",
+        "* Init GPT Cache using the following lines:\n",
+        "```python\n",
+        "from gptcache import cache\n",
+        "cache.init()\n",
+        "cache.set_openai_key()\n",
+        "```"
+      ],
+      "metadata": {
+        "id": "DlZ22IfmDR5L"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## With OpenAI"
+      ],
+      "metadata": {
+        "id": "js80pW9PC1KQ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from gptcache import cache\n",
+        "import os\n",
+        "from litellm.cache import completion # import completion from litellm.cache\n",
+        "import time\n",
+        "\n",
+        "# Set your .env keys\n",
+        "os.environ['OPENAI_API_KEY'] = \"\"\n",
+        "\n",
+        "##### GPT Cache Init\n",
+        "cache.init()\n",
+        "cache.set_openai_key()\n",
+        "#### End of GPT Cache Init\n",
+        "\n",
+        "question = \"what's LiteLLM\"\n",
+        "for _ in range(2):\n",
+        "    start_time = time.time()\n",
+        "    response = completion(\n",
+        "      model='gpt-3.5-turbo',\n",
+        "      messages=[\n",
+        "        {\n",
+        "            'role': 'user',\n",
+        "            'content': question\n",
+        "        }\n",
+        "      ],\n",
+        "    )\n",
+        "    print(f'Question: {question}')\n",
+        "    print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "24a-mg1OCWe1",
+        "outputId": "36130cb6-9bd6-4bc6-8405-b6e19a1e9357"
+      },
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "start to install package: redis\n",
+            "successfully installed package: redis\n",
+            "start to install package: redis_om\n",
+            "successfully installed package: redis_om\n",
+            "Question: what's LiteLLM\n",
+            "Time consuming: 1.18s\n",
+            "Question: what's LiteLLM\n",
+            "Time consuming: 0.00s\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## With Cohere"
+      ],
+      "metadata": {
+        "id": "xXPtHamPCy73"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from gptcache import cache\n",
+        "import os\n",
+        "from litellm.cache import completion # import completion from litellm.cache\n",
+        "import time\n",
+        "\n",
+        "# Set your .env keys\n",
+        "os.environ['COHERE_API_KEY'] = \"\"\n",
+        "\n",
+        "##### GPT Cache Init\n",
+        "cache.init()\n",
+        "cache.set_openai_key()\n",
+        "#### End of GPT Cache Init\n",
+        "\n",
+        "question = \"what's LiteLLM Github\"\n",
+        "for _ in range(2):\n",
+        "    start_time = time.time()\n",
+        "    response = completion(\n",
+        "      model='gpt-3.5-turbo',\n",
+        "      messages=[\n",
+        "        {\n",
+        "            'role': 'user',\n",
+        "            'content': question\n",
+        "        }\n",
+        "      ],\n",
+        "    )\n",
+        "    print(f'Question: {question}')\n",
+        "    print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "goRtiiAlChRW",
+        "outputId": "47f473da-5560-4d6f-d9ef-525ff8e60758"
+      },
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Question: what's LiteLLM Github\n",
+            "Time consuming: 1.58s\n",
+            "Question: what's LiteLLM Github\n",
+            "Time consuming: 0.00s\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file