mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
123 lines
No EOL
2.9 KiB
Text
Vendored
123 lines
No EOL
2.9 KiB
Text
Vendored
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## LiteLLM Caching Tutorial\n",
|
|
"Link to using Caching in Docs:\n",
|
|
"https://docs.litellm.ai/docs/caching/"
|
|
],
|
|
"metadata": {
|
|
"id": "Lvj-GI3YQfQx"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "eKSBuuKn99Jm"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install litellm==0.1.492"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## Set `caching_with_models` to True\n",
|
|
"Enables caching on a per-model basis.\n",
|
|
"Keys are the input messages + model and values stored in the cache is the corresponding response"
|
|
],
|
|
"metadata": {
|
|
"id": "sFXj4UUnQpyt"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import os, time, litellm\n",
|
|
"from litellm import completion\n",
|
|
"litellm.caching_with_models = True # set caching for each model to True\n"
|
|
],
|
|
"metadata": {
|
|
"id": "xCea1EjR99rU"
|
|
},
|
|
"execution_count": 8,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"os.environ['OPENAI_API_KEY'] = \"\""
|
|
],
|
|
"metadata": {
|
|
"id": "VK3kXGXI-dtC"
|
|
},
|
|
"execution_count": 9,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## Use LiteLLM Cache"
|
|
],
|
|
"metadata": {
|
|
"id": "U_CDCcnjQ7c6"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"question = \"write 1 page about what's LiteLLM\"\n",
|
|
"for _ in range(2):\n",
|
|
" start_time = time.time()\n",
|
|
" response = completion(\n",
|
|
" model='gpt-3.5-turbo',\n",
|
|
" messages=[\n",
|
|
" {\n",
|
|
" 'role': 'user',\n",
|
|
" 'content': question\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" )\n",
|
|
" print(f'Question: {question}')\n",
|
|
" print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "Efli-J-t-bJH",
|
|
"outputId": "cfdb1e14-96b0-48ee-c504-7f567e84c349"
|
|
},
|
|
"execution_count": 10,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"Question: write 1 page about what's LiteLLM\n",
|
|
"Time consuming: 13.53s\n",
|
|
"Question: write 1 page about what's LiteLLM\n",
|
|
"Time consuming: 0.00s\n"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
} |