mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
(cleanup) cookbook notebooks
This commit is contained in:
parent
a8d65741f5
commit
b5751bd040
4 changed files with 0 additions and 718 deletions
259
cookbook/LiteLLM_Azure_OpenAI.ipynb
vendored
259
cookbook/LiteLLM_Azure_OpenAI.ipynb
vendored
|
@ -1,259 +0,0 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Use Azure OpenAI with LiteLLM"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "oTA-1bG_wBVw"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "RreFKTyKv2nt"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Pass API_BASE, API_VERSION, API_KEY in COMPLETION()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "kSOo9lbKv_7H"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"response = litellm.completion(\n",
|
||||
" model = \"azure/chatgpt-v-2\", # model = azure/<your deployment name>\n",
|
||||
" api_base = \"https://openai-gpt-4-test-v-1.openai.azure.com/\", # azure api base\n",
|
||||
" api_version = \"2023-05-15\", # azure api version\n",
|
||||
" api_key = \"\", # azure api key\n",
|
||||
" messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
|
||||
" max_tokens=10,\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "gWIsjrHMv_DM",
|
||||
"outputId": "732e9daa-8dca-4bc1-bb8a-aee90ee14c8d"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-877x4J2JUSReOuxVGE3THLjcmdrI8\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1696709554,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"length\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Good morning! How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 10,\n",
|
||||
" \"prompt_tokens\": 10,\n",
|
||||
" \"total_tokens\": 20\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"id": "PR5uhvVHxe-C"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Set .env variables with Azure / LiteLLM"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "1P2hprlLxfDc"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ['AZURE_API_KEY'] = \"\"\n",
|
||||
"os.environ['AZURE_API_BASE'] = \"\"\n",
|
||||
"os.environ['AZURE_API_VERSION'] = \"\"\n",
|
||||
"\n",
|
||||
"response = litellm.completion(\n",
|
||||
" model = \"azure/chatgpt-v-2\", # model = azure/<your deployment name>\n",
|
||||
" messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
|
||||
" max_tokens=10,\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "mGi9Gae1wMjK",
|
||||
"outputId": "29f2a9cf-f6ee-416b-9b24-02588d96fe59"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-877zB0GWZl4zswopLt12yQEzEfYWy\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1696709685,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"length\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Good morning! How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 10,\n",
|
||||
" \"prompt_tokens\": 10,\n",
|
||||
" \"total_tokens\": 20\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## With Streaming"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "uIhyvSVNx4hX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"response = litellm.completion(\n",
|
||||
" model = \"azure/chatgpt-v-2\",\n",
|
||||
" messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
|
||||
" max_tokens=10,\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "R4KYKLOHxy9r"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## With Rate Limit Handler"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "hB8jLz94ybTC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from litellm import RateLimitManager\n",
|
||||
"\n",
|
||||
"handler = RateLimitManager(max_requests_per_minute=10, max_tokens_per_minute=200)\n",
|
||||
"\n",
|
||||
"response = await handler.acompletion(\n",
|
||||
" model = \"azure/chatgpt-v-2\",\n",
|
||||
" messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
|
||||
" max_tokens=10,\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "CQECDwpix7Hl",
|
||||
"outputId": "18dc4041-3262-4ab7-a451-34ceaf70ca31"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-8781gvDKwPbp44CliumABgAuIDnSf\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1696709840,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"length\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Good morning! How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 10,\n",
|
||||
" \"prompt_tokens\": 10,\n",
|
||||
" \"total_tokens\": 20\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
123
cookbook/LiteLLM_Caching.ipynb
vendored
123
cookbook/LiteLLM_Caching.ipynb
vendored
|
@ -1,123 +0,0 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## LiteLLM Caching Tutorial\n",
|
||||
"Link to using Caching in Docs:\n",
|
||||
"https://docs.litellm.ai/docs/caching/"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Lvj-GI3YQfQx"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "eKSBuuKn99Jm"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm==0.1.492"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Set `caching_with_models` to True\n",
|
||||
"Enables caching on a per-model basis.\n",
|
||||
"Keys are the input messages + model and values stored in the cache is the corresponding response"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "sFXj4UUnQpyt"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os, time, litellm\n",
|
||||
"from litellm import completion\n",
|
||||
"litellm.caching_with_models = True # set caching for each model to True\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "xCea1EjR99rU"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "VK3kXGXI-dtC"
|
||||
},
|
||||
"execution_count": 9,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Use LiteLLM Cache"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "U_CDCcnjQ7c6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"question = \"write 1 page about what's LiteLLM\"\n",
|
||||
"for _ in range(2):\n",
|
||||
" start_time = time.time()\n",
|
||||
" response = completion(\n",
|
||||
" model='gpt-3.5-turbo',\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" 'role': 'user',\n",
|
||||
" 'content': question\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" print(f'Question: {question}')\n",
|
||||
" print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Efli-J-t-bJH",
|
||||
"outputId": "cfdb1e14-96b0-48ee-c504-7f567e84c349"
|
||||
},
|
||||
"execution_count": 10,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Question: write 1 page about what's LiteLLM\n",
|
||||
"Time consuming: 13.53s\n",
|
||||
"Question: write 1 page about what's LiteLLM\n",
|
||||
"Time consuming: 0.00s\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
336
cookbook/LiteLLM_GPTCache.ipynb
vendored
336
cookbook/LiteLLM_GPTCache.ipynb
vendored
|
@ -1,336 +0,0 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Using GPT Cache with LiteLLM\n",
|
||||
"GPT Cache allows you to slash Your LLM API Costs by 10x 💰, Boost Speed by 100x ⚡\n",
|
||||
"\n",
|
||||
"In this tutorial we demo how to use LiteLLM with GPTCache\n",
|
||||
"* Quick Start Usage\n",
|
||||
"* Advanced Usaged\n",
|
||||
"* Setting custom cache keys\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "2BUxu9L2mPbX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "SRbVXJUGk6HC"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# installation\n",
|
||||
"!pip install litellm gptcache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Set ENV variables\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "UuZX3OSBlIDt"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ['OPENAI_API_KEY'] = \"\"\n",
|
||||
"os.environ['COHERE_API_KEY'] = \"\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "E4jn-bPWlBZs"
|
||||
},
|
||||
"execution_count": 12,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Quick Start Usage\n",
|
||||
"By default GPT Cache uses the content in `messages` as the cache key\n",
|
||||
" Import GPT Cache"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Tswo-058lcid"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"from litellm.gpt_cache import completion\n",
|
||||
"\n",
|
||||
"### using / setting up gpt cache\n",
|
||||
"from gptcache import cache\n",
|
||||
"cache.init()\n",
|
||||
"cache.set_openai_key()\n",
|
||||
"#########################"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "9oOV8gRtk_la"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"## two completion calls\n",
|
||||
"import time\n",
|
||||
"question = \"why should i use LiteLLM\"\n",
|
||||
"for _ in range(2):\n",
|
||||
" start_time = time.time()\n",
|
||||
" response = completion(\n",
|
||||
" model='gpt-3.5-turbo',\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" 'role': 'user',\n",
|
||||
" 'content': question\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" print(f'Question: {question}, Response {response}')\n",
|
||||
" print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Em1kgIOOm1Vo",
|
||||
"outputId": "d8e57747-a851-4675-f936-d65e5570d95a"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Question: why should i use LiteLLM, Response {\n",
|
||||
" \"id\": \"chatcmpl-7tJozrtW5UzVHNUcxX6cfzRS4nbxd\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1693418589,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"There are several reasons why you might consider using LiteLLM:\\n\\n1. Simplified document management: LiteLLM offers a user-friendly interface that makes it easy to manage and organize your legal documents. You can track versions, organize files into folders, and quickly find what you need.\\n\\n2. Collaboration and accessibility: LiteLLM allows multiple users to work on documents simultaneously, making it easier for teams to collaborate and exchange feedback. It also provides flexible accessibility, allowing you to access your documents from anywhere, anytime, as long as you have an internet connection.\\n\\n3. Time-saving features: The platform offers various time-saving features, such as automated document generation, customizable templates, and integration with other tools like Microsoft Word. This can significantly reduce the time and effort required to prepare legal documents.\\n\\n4. Enhanced security: LiteLLM prioritizes the security of your data. It provides robust encryption, secure data storage, and role-based access controls. This ensures that your sensitive legal documents are protected from unauthorized access.\\n\\n5. Cost-effective solution: LiteLLM offers a cost-effective solution compared to traditional legal document management systems. With its cloud-based approach, you don't need to invest in expensive hardware or software installations. Instead, you pay for a subscription-based model, which can be more affordable for small firms or individual practitioners.\\n\\nUltimately, the decision to use LiteLLM depends on your specific needs and preferences. It's important to consider factors such as the size of your practice, the volume of legal documents you handle, and your budget before making a decision.\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"completion_tokens\": 312,\n",
|
||||
" \"total_tokens\": 326\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Time consuming: 9.48s\n",
|
||||
"Question: why should i use LiteLLM, Response {'gptcache': True, 'saved_token': [14, 312], 'choices': [{'message': {'role': 'assistant', 'content': \"There are several reasons why you might consider using LiteLLM:\\n\\n1. Simplified document management: LiteLLM offers a user-friendly interface that makes it easy to manage and organize your legal documents. You can track versions, organize files into folders, and quickly find what you need.\\n\\n2. Collaboration and accessibility: LiteLLM allows multiple users to work on documents simultaneously, making it easier for teams to collaborate and exchange feedback. It also provides flexible accessibility, allowing you to access your documents from anywhere, anytime, as long as you have an internet connection.\\n\\n3. Time-saving features: The platform offers various time-saving features, such as automated document generation, customizable templates, and integration with other tools like Microsoft Word. This can significantly reduce the time and effort required to prepare legal documents.\\n\\n4. Enhanced security: LiteLLM prioritizes the security of your data. It provides robust encryption, secure data storage, and role-based access controls. This ensures that your sensitive legal documents are protected from unauthorized access.\\n\\n5. Cost-effective solution: LiteLLM offers a cost-effective solution compared to traditional legal document management systems. With its cloud-based approach, you don't need to invest in expensive hardware or software installations. Instead, you pay for a subscription-based model, which can be more affordable for small firms or individual practitioners.\\n\\nUltimately, the decision to use LiteLLM depends on your specific needs and preferences. It's important to consider factors such as the size of your practice, the volume of legal documents you handle, and your budget before making a decision.\"}, 'finish_reason': 'stop', 'index': 0}], 'created': 1693418598, 'usage': {'completion_tokens': 0, 'prompt_tokens': 0, 'total_tokens': 0}, 'object': 'chat.completion'}\n",
|
||||
"Time consuming: 0.00s\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Advanced Usage - Setting custom keys for Cache\n",
|
||||
"By default gptcache uses the `messages` as the cache key\n",
|
||||
"\n",
|
||||
"GPTCache allows you to set custom cache keys by setting\n",
|
||||
"```python\n",
|
||||
"cache.init(pre_func=pre_cache_func)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"In this code snippet below we define a `pre_func` that returns message content + model as key"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "R6hywKu8nXXW"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Defining a `pre_func` for GPTCache\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "6nx1X-2Hn3ak"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"### using / setting up gpt cache\n",
|
||||
"from gptcache import cache\n",
|
||||
"from gptcache.processor.pre import last_content_without_prompt\n",
|
||||
"from typing import Dict, Any\n",
|
||||
"\n",
|
||||
"# use this function to set your cache keys -> gptcache\n",
|
||||
"# data are all the args passed to your completion call\n",
|
||||
"def pre_cache_func(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:\n",
|
||||
" # use this to set cache key\n",
|
||||
" print(\"in pre_cache_func\")\n",
|
||||
" last_content_without_prompt_val = last_content_without_prompt(data, **params)\n",
|
||||
" print(\"last content without prompt\", last_content_without_prompt_val)\n",
|
||||
" print(\"model\", data[\"model\"])\n",
|
||||
" cache_key = last_content_without_prompt_val + data[\"model\"]\n",
|
||||
" print(\"cache_key\", cache_key)\n",
|
||||
" return cache_key # using this as cache_key\n",
|
||||
""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "jJQsTyrZlvDY"
|
||||
},
|
||||
"execution_count": 9,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Init Cache with `pre_func` to set custom keys"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Tjv1e0hqn-dX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# init GPT Cache with custom pre_func\n",
|
||||
"cache.init(pre_func=pre_cache_func)\n",
|
||||
"cache.set_openai_key()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Ua8UhEp6n9yR"
|
||||
},
|
||||
"execution_count": 10,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Using Cache\n",
|
||||
"* Cache key is `message` + `model`\n",
|
||||
"\n",
|
||||
"We make 3 LLM API calls\n",
|
||||
"* 2 to OpenAI\n",
|
||||
"* 1 to Cohere command nightly"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "jHqWdfC4sTHf"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"messages = [{\"role\": \"user\", \"content\": \"why should I use LiteLLM for completions()\"}]\n",
|
||||
"response1 = completion(model=\"gpt-3.5-turbo\", messages=messages)\n",
|
||||
"response2 = completion(model=\"gpt-3.5-turbo\", messages=messages)\n",
|
||||
"response3 = completion(model=\"command-nightly\", messages=messages) # calling cohere command nightly\n",
|
||||
"\n",
|
||||
"if response1[\"choices\"] != response2[\"choices\"]: # same models should cache\n",
|
||||
" print(f\"Error occurred: Caching for same model+prompt failed\")\n",
|
||||
"\n",
|
||||
"if response3[\"choices\"] == response2[\"choices\"]: # different models, don't cache\n",
|
||||
" # if models are different, it should not return cached response\n",
|
||||
" print(f\"Error occurred: Caching for different model+prompt failed\")\n",
|
||||
"\n",
|
||||
"print(\"response1\", response1)\n",
|
||||
"print(\"response2\", response2)\n",
|
||||
"print(\"response3\", response3)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "ftrKpB2GsPMi",
|
||||
"outputId": "1ee49273-bd62-49b4-a177-d40e33a51785"
|
||||
},
|
||||
"execution_count": 14,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"in pre_cache_func\n",
|
||||
"last content without prompt why should I use LiteLLM for completions()\n",
|
||||
"model gpt-3.5-turbo\n",
|
||||
"cache_key why should I use LiteLLM for completions()gpt-3.5-turbo\n",
|
||||
"in pre_cache_func\n",
|
||||
"last content without prompt why should I use LiteLLM for completions()\n",
|
||||
"model gpt-3.5-turbo\n",
|
||||
"cache_key why should I use LiteLLM for completions()gpt-3.5-turbo\n",
|
||||
"in pre_cache_func\n",
|
||||
"last content without prompt why should I use LiteLLM for completions()\n",
|
||||
"model command-nightly\n",
|
||||
"cache_key why should I use LiteLLM for completions()command-nightly\n",
|
||||
"response1 {\n",
|
||||
" \"id\": \"chatcmpl-7tKE21PEe43sR6RvZ7pcUmanFwZLf\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1693420142,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"There are several reasons why you should use LiteLLM for completions() in your code:\\n\\n1. Fast and efficient: LiteLLM is implemented in a lightweight manner, making it highly performant. It provides quick and accurate completions, ensuring a smooth development experience.\\n\\n2. Customizable completion options: LiteLLM allows you to customize the completion options based on your specific needs. You can specify the maximum number of completions to retrieve, the desired timeout, and more.\\n\\n3. Language-agnostic: LiteLLM supports multiple programming languages, including Python, JavaScript, Java, C++, and many others. So, regardless of the language you are using, LiteLLM can help you with intelligent code completions.\\n\\n4. Learning capabilities: LiteLLM has the ability to learn from the provided code and context, improving the accuracy of the completions over time. This means that as you continue to use it, the suggested completions will become increasingly tailored to your specific coding style and patterns.\\n\\n5. Ease of integration: LiteLLM is designed to be easily integrated into existing code editors or IDEs. It provides a simple API that allows you to integrate it seamlessly into your development workflow.\\n\\n6. Supported by OpenAI: LiteLLM is developed and maintained by OpenAI, a well-known and reputable organization in the field of artificial intelligence. This ensures ongoing support and updates to enhance the functionality and performance of LiteLLM.\\n\\nOverall, using LiteLLM for completions() can greatly improve your coding productivity by providing accurate and context-aware code completion suggestions, regardless of the programming language you are working with.\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 18,\n",
|
||||
" \"completion_tokens\": 326,\n",
|
||||
" \"total_tokens\": 344\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"response2 {'gptcache': True, 'saved_token': [18, 326], 'choices': [{'message': {'role': 'assistant', 'content': 'There are several reasons why you should use LiteLLM for completions() in your code:\\n\\n1. Fast and efficient: LiteLLM is implemented in a lightweight manner, making it highly performant. It provides quick and accurate completions, ensuring a smooth development experience.\\n\\n2. Customizable completion options: LiteLLM allows you to customize the completion options based on your specific needs. You can specify the maximum number of completions to retrieve, the desired timeout, and more.\\n\\n3. Language-agnostic: LiteLLM supports multiple programming languages, including Python, JavaScript, Java, C++, and many others. So, regardless of the language you are using, LiteLLM can help you with intelligent code completions.\\n\\n4. Learning capabilities: LiteLLM has the ability to learn from the provided code and context, improving the accuracy of the completions over time. This means that as you continue to use it, the suggested completions will become increasingly tailored to your specific coding style and patterns.\\n\\n5. Ease of integration: LiteLLM is designed to be easily integrated into existing code editors or IDEs. It provides a simple API that allows you to integrate it seamlessly into your development workflow.\\n\\n6. Supported by OpenAI: LiteLLM is developed and maintained by OpenAI, a well-known and reputable organization in the field of artificial intelligence. This ensures ongoing support and updates to enhance the functionality and performance of LiteLLM.\\n\\nOverall, using LiteLLM for completions() can greatly improve your coding productivity by providing accurate and context-aware code completion suggestions, regardless of the programming language you are working with.'}, 'finish_reason': 'stop', 'index': 0}], 'created': 1693420152, 'usage': {'completion_tokens': 0, 'prompt_tokens': 0, 'total_tokens': 0}, 'object': 'chat.completion'}\n",
|
||||
"response3 {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" LiteLLM is a state-of-the-art, privacy-preserving LLM trained\",\n",
|
||||
" \"role\": \"assistant\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1693420153.8769038,\n",
|
||||
" \"model\": \"command-nightly\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 11,\n",
|
||||
" \"completion_tokens\": 16,\n",
|
||||
" \"total_tokens\": 27\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue