diff --git a/cookbook/LiteLLM_Caching.ipynb b/cookbook/LiteLLM_Caching.ipynb new file mode 100644 index 000000000..1d025e4df --- /dev/null +++ b/cookbook/LiteLLM_Caching.ipynb @@ -0,0 +1,123 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## LiteLLM Caching Tutorial\n", + "Link to using Caching in Docs:\n", + "https://docs.litellm.ai/docs/caching/" + ], + "metadata": { + "id": "Lvj-GI3YQfQx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eKSBuuKn99Jm" + }, + "outputs": [], + "source": [ + "!pip install litellm==0.1.492" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Set `caching_with_models` to True\n", + "Enables caching on a per-model basis.\n", + "Keys are the input messages + model and values stored in the cache is the corresponding response" + ], + "metadata": { + "id": "sFXj4UUnQpyt" + } + }, + { + "cell_type": "code", + "source": [ + "import os, time, litellm\n", + "from litellm import completion\n", + "litellm.caching_with_models = True # set caching for each model to True\n" + ], + "metadata": { + "id": "xCea1EjR99rU" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "os.environ['OPENAI_API_KEY'] = \"\"" + ], + "metadata": { + "id": "VK3kXGXI-dtC" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Use LiteLLM Cache" + ], + "metadata": { + "id": "U_CDCcnjQ7c6" + } + }, + { + "cell_type": "code", + "source": [ + "question = \"write 1 page about what's LiteLLM\"\n", + "for _ in range(2):\n", + " start_time = time.time()\n", + " response = completion(\n", + " model='gpt-3.5-turbo',\n", + " messages=[\n", + " {\n", + " 'role': 'user',\n", + " 'content': question\n", + " }\n", + " ],\n", + " )\n", + " print(f'Question: {question}')\n", + " print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Efli-J-t-bJH", + "outputId": "cfdb1e14-96b0-48ee-c504-7f567e84c349" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Question: write 1 page about what's LiteLLM\n", + "Time consuming: 13.53s\n", + "Question: write 1 page about what's LiteLLM\n", + "Time consuming: 0.00s\n" + ] + } + ] + } + ] +} \ No newline at end of file