(code quality) run ruff rule to ban unused imports (#7313)

* remove unused imports

* fix AmazonConverseConfig

* fix test

* fix import

* ruff check fixes

* test fixes

* fix testing

* fix imports
This commit is contained in:
Ishaan Jaff 2024-12-19 12:33:42 -08:00 committed by GitHub
parent 5e344497ce
commit c7f14e936a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
347 changed files with 5473 additions and 7207 deletions

View file

@ -1,6 +1,4 @@
from locust import HttpUser, task, between, events from locust import HttpUser, task, between
import json
import time
class MyUser(HttpUser): class MyUser(HttpUser):
@ -10,7 +8,7 @@ class MyUser(HttpUser):
def chat_completion(self): def chat_completion(self):
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA", "Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
# Include any additional headers you may need for authentication, etc. # Include any additional headers you may need for authentication, etc.
} }

File diff suppressed because one or more lines are too long

View file

@ -36,9 +36,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import litellm\n", "from litellm import load_test_model, testing_batch_completion"
"from litellm import load_test_model, testing_batch_completion\n",
"import time"
] ]
}, },
{ {

View file

@ -1,21 +1,10 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "BmX0b5Ueh91v"
},
"source": [ "source": [
"# LiteLLM - Azure OpenAI + OpenAI Calls\n", "# LiteLLM - Azure OpenAI + OpenAI Calls\n",
"This notebook covers the following for Azure OpenAI + OpenAI:\n", "This notebook covers the following for Azure OpenAI + OpenAI:\n",
@ -24,10 +13,7 @@
"* Completion - Azure, OpenAI in separate threads\n", "* Completion - Azure, OpenAI in separate threads\n",
"* Completion - Stress Test 10 requests in parallel\n", "* Completion - Stress Test 10 requests in parallel\n",
"* Completion - Azure, OpenAI in the same thread" "* Completion - Azure, OpenAI in the same thread"
], ]
"metadata": {
"id": "BmX0b5Ueh91v"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -42,57 +28,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 2,
"import os, litellm"
],
"metadata": { "metadata": {
"id": "mnveHO5dfcB0" "id": "mnveHO5dfcB0"
}, },
"execution_count": 2, "outputs": [],
"outputs": [] "source": [
"import os"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Completion - Quick start"
],
"metadata": { "metadata": {
"id": "eo88QUdbiDIE" "id": "eo88QUdbiDIE"
} },
"source": [
"## Completion - Quick start"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 12,
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -100,11 +56,10 @@
"id": "5OSosWNCfc_2", "id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348" "outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
}, },
"execution_count": 12,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"Openai Response\n", "Openai Response\n",
"\n", "\n",
@ -154,19 +109,54 @@
"}\n" "}\n"
] ]
} }
],
"source": [
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Completion - Streaming"
],
"metadata": { "metadata": {
"id": "dQMkM-diiKdE" "id": "dQMkM-diiKdE"
} },
"source": [
"## Completion - Streaming"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uVvJDVn4g1i1"
},
"outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"from litellm import completion\n", "from litellm import completion\n",
@ -199,24 +189,24 @@
"print(\"Azure Streaming response\")\n", "print(\"Azure Streaming response\")\n",
"for chunk in response:\n", "for chunk in response:\n",
" print(chunk)\n" " print(chunk)\n"
], ]
"metadata": {
"id": "uVvJDVn4g1i1"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in separate threads"
],
"metadata": { "metadata": {
"id": "4xrOPnt-oqwm" "id": "4xrOPnt-oqwm"
} },
"source": [
"## Completion - Azure, OpenAI in separate threads"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "V5b5taJPjvC3"
},
"outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"import threading\n", "import threading\n",
@ -255,25 +245,25 @@
"thread2.join()\n", "thread2.join()\n",
"\n", "\n",
"print(\"Both completions are done.\")" "print(\"Both completions are done.\")"
], ]
"metadata": {
"id": "V5b5taJPjvC3"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "lx8DbMBqoAoN"
},
"source": [ "source": [
"## Completion - Stress Test 10 requests in parallel\n", "## Completion - Stress Test 10 requests in parallel\n",
"\n" "\n"
], ]
"metadata": {
"id": "lx8DbMBqoAoN"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pHYANOlOkoDh"
},
"outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"import threading\n", "import threading\n",
@ -309,57 +299,20 @@
" thread.join()\n", " thread.join()\n",
"\n", "\n",
"print(\"All completions are done.\")\n" "print(\"All completions are done.\")\n"
], ]
"metadata": {
"id": "pHYANOlOkoDh"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in the same thread"
],
"metadata": { "metadata": {
"id": "yB2NDOO4oxrp" "id": "yB2NDOO4oxrp"
} },
"source": [
"## Completion - Azure, OpenAI in the same thread"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 23,
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -367,11 +320,10 @@
"id": "HTBqwzxpnxab", "id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14" "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
}, },
"execution_count": 23,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"OpenAI Response: {\n", "OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n", " \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
@ -417,7 +369,54 @@
"}\n" "}\n"
] ]
} }
],
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
] ]
} }
] ],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

File diff suppressed because one or more lines are too long

View file

@ -1,30 +1,16 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "MbLbs1tbISk-"
},
"source": [ "source": [
"# LiteLLM Batch Completions Example\n", "# LiteLLM Batch Completions Example\n",
"\n", "\n",
"* This tutorial walks through using `batch_completion`\n", "* This tutorial walks through using `batch_completion`\n",
"* Docs: https://docs.litellm.ai/docs/completion/batching" "* Docs: https://docs.litellm.ai/docs/completion/batching"
], ]
"metadata": {
"id": "MbLbs1tbISk-"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -39,69 +25,42 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Import Batch Completion"
],
"metadata": { "metadata": {
"id": "KGhNJRUCIh1j" "id": "KGhNJRUCIh1j"
} },
"source": [
"## Import Batch Completion"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "LOtI43snDrSK"
},
"outputs": [],
"source": [ "source": [
"import litellm\n",
"import os\n", "import os\n",
"from litellm import batch_completion\n", "from litellm import batch_completion\n",
"\n", "\n",
"# set your API_KEY\n", "# set your API_KEY\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"" "os.environ['ANTHROPIC_API_KEY'] = \"\""
], ]
"metadata": {
"id": "LOtI43snDrSK"
},
"execution_count": 7,
"outputs": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "Xhv92NBaIpaw"
},
"source": [ "source": [
"## Calling `litellm.batch_completion`\n", "## Calling `litellm.batch_completion`\n",
"\n", "\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call." "In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
], ]
"metadata": {
"id": "Xhv92NBaIpaw"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 11,
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -109,10 +68,8 @@
"id": "yY7GIRLsDywu", "id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb" "outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
}, },
"execution_count": 11,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"[<ModelResponse at 0x7a164eed4450> JSON: {\n", "[<ModelResponse at 0x7a164eed4450> JSON: {\n",
@ -157,10 +114,50 @@
" }]" " }]"
] ]
}, },
"execution_count": 11,
"metadata": {}, "metadata": {},
"execution_count": 11 "output_type": "execute_result"
} }
],
"source": [
"import os\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
] ]
} }
] ],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -19,7 +19,8 @@
"source": [ "source": [
"import csv\n", "import csv\n",
"from typing import Optional\n", "from typing import Optional\n",
"import httpx, json\n", "import httpx\n",
"import json\n",
"import asyncio\n", "import asyncio\n",
"\n", "\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n", "proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",

View file

@ -14,16 +14,16 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"metadata": { "metadata": {
"id": "pc6IO4V99O25",
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
}, },
"id": "pc6IO4V99O25",
"outputId": "2d69da44-010b-41c2-b38b-5b478576bb8b" "outputId": "2d69da44-010b-41c2-b38b-5b478576bb8b"
}, },
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"Collecting litellm\n", "Collecting litellm\n",
" Downloading litellm-0.1.482-py3-none-any.whl (69 kB)\n", " Downloading litellm-0.1.482-py3-none-any.whl (69 kB)\n",
@ -134,11 +134,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 4,
"model_name = \"togethercomputer/CodeLlama-34b-Instruct\"\n",
"response = completion(model=model_name, messages=messages, max_tokens=200)\n",
"print(response)"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -146,11 +142,10 @@
"id": "GIUevHlMvPb8", "id": "GIUevHlMvPb8",
"outputId": "ad930a12-16e3-4400-fff4-38151e4f6da5" "outputId": "ad930a12-16e3-4400-fff4-38151e4f6da5"
}, },
"execution_count": 4,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"\u001b[92mHere's your LiteLLM Dashboard 👉 \u001b[94m\u001b[4mhttps://admin.litellm.ai/6c0f0403-becb-44af-9724-7201c7d381d0\u001b[0m\n", "\u001b[92mHere's your LiteLLM Dashboard 👉 \u001b[94m\u001b[4mhttps://admin.litellm.ai/6c0f0403-becb-44af-9724-7201c7d381d0\u001b[0m\n",
"{\n", "{\n",
@ -174,6 +169,11 @@
"}\n" "}\n"
] ]
} }
],
"source": [
"model_name = \"togethercomputer/CodeLlama-34b-Instruct\"\n",
"response = completion(model=model_name, messages=messages, max_tokens=200)\n",
"print(response)"
] ]
}, },
{ {
@ -976,7 +976,6 @@
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n", "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n", "\n",
"\n", "\n",
"import asyncio\n",
"async def parse_stream(stream):\n", "async def parse_stream(stream):\n",
" async for elem in stream:\n", " async for elem in stream:\n",
" print(elem)\n", " print(elem)\n",

View file

@ -1,51 +1,37 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "eKXncoQbU_2j"
},
"source": [ "source": [
"# Using Nemo-Guardrails with LiteLLM Server\n", "# Using Nemo-Guardrails with LiteLLM Server\n",
"\n", "\n",
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)" "[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
], ]
"metadata": {
"id": "eKXncoQbU_2j"
}
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "ZciYaLwvuFbu"
},
"source": [ "source": [
"## Using with Bedrock\n", "## Using with Bedrock\n",
"\n", "\n",
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`" "`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
], ]
"metadata": {
"id": "ZciYaLwvuFbu"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"pip install nemoguardrails langchain"
],
"metadata": { "metadata": {
"id": "vOUwGSJ2Vsy3" "id": "vOUwGSJ2Vsy3"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"pip install nemoguardrails langchain"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -55,7 +41,6 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n", "from langchain.chat_models import ChatOpenAI\n",
"\n", "\n",
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n", "llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
@ -73,6 +58,9 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "vz5n00qyuKjp"
},
"source": [ "source": [
"## Using with TogetherAI\n", "## Using with TogetherAI\n",
"\n", "\n",
@ -80,15 +68,16 @@
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n", "`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
"\n", "\n",
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`" "2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
], ]
"metadata": {
"id": "vz5n00qyuKjp"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XK1sk-McuhpE"
},
"outputs": [],
"source": [ "source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n", "from langchain.chat_models import ChatOpenAI\n",
"\n", "\n",
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n", "llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
@ -102,26 +91,26 @@
" \"role\": \"user\",\n", " \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n", " \"content\": \"Hello! What can you do for me?\"\n",
"}])" "}])"
], ]
"metadata": {
"id": "XK1sk-McuhpE"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "8A1KWKnzuxAS"
},
"source": [ "source": [
"### CONFIG.YML\n", "### CONFIG.YML\n",
"\n", "\n",
"save this example `config.yml` in your current directory" "save this example `config.yml` in your current directory"
], ]
"metadata": {
"id": "8A1KWKnzuxAS"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NKN1GmSvu0Cx"
},
"outputs": [],
"source": [ "source": [
"# instructions:\n", "# instructions:\n",
"# - type: general\n", "# - type: general\n",
@ -148,12 +137,21 @@
"# - type: main\n", "# - type: main\n",
"# engine: openai\n", "# engine: openai\n",
"# model: claude-instant-1" "# model: claude-instant-1"
]
}
], ],
"metadata": { "metadata": {
"id": "NKN1GmSvu0Cx" "colab": {
"provenance": []
}, },
"execution_count": null, "kernelspec": {
"outputs": [] "display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
} }
] },
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,16 +1,12 @@
import sys, os
import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import litellm import litellm
from litellm import embedding, completion, completion_cost
from autoevals.llm import * from autoevals.llm import *
################### ###################
import litellm
# litellm completion call # litellm completion call
question = "which country has the highest population" question = "which country has the highest population"

View file

@ -1,11 +1,12 @@
import traceback import traceback
from flask import Flask, request, jsonify, abort, Response from flask import Flask, request, Response
from flask_cors import CORS from flask_cors import CORS
import traceback
import litellm import litellm
from util import handle_error from util import handle_error
from litellm import completion from litellm import completion
import os, dotenv, time import os
import dotenv
import time
import json import json
dotenv.load_dotenv() dotenv.load_dotenv()
@ -20,9 +21,9 @@ verbose = True
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/ # litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
######### PROMPT LOGGING ########## ######### PROMPT LOGGING ##########
os.environ[ os.environ["PROMPTLAYER_API_KEY"] = (
"PROMPTLAYER_API_KEY" "" # set your promptlayer key here - https://promptlayer.com/
] = "" # set your promptlayer key here - https://promptlayer.com/ )
# set callbacks # set callbacks
litellm.success_callback = ["promptlayer"] litellm.success_callback = ["promptlayer"]
@ -57,9 +58,9 @@ def api_completion():
try: try:
if "prompt" not in data: if "prompt" not in data:
raise ValueError("data needs to have prompt") raise ValueError("data needs to have prompt")
data[ data["model"] = (
"model" "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct )
# COMPLETION CALL # COMPLETION CALL
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that." system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
messages = [ messages = [
@ -75,7 +76,7 @@ def api_completion():
"stream" in data and data["stream"] == True "stream" in data and data["stream"] == True
): # use generate_responses to stream responses ): # use generate_responses to stream responses
return Response(data_generator(response), mimetype="text/event-stream") return Response(data_generator(response), mimetype="text/event-stream")
except Exception as e: except Exception:
# call handle_error function # call handle_error function
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}") print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
## LOG FAILURE ## LOG FAILURE

View file

@ -1,5 +1,4 @@
import requests import requests
from urllib.parse import urlparse, parse_qs
def get_next_url(response): def get_next_url(response):

View file

@ -1,21 +1,10 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "gZx-wHJapG5w"
},
"source": [ "source": [
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n", "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
"\n", "\n",
@ -30,10 +19,7 @@
"model = \"q841o8w\" # baseten model version ID\n", "model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n", "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```" "```"
], ]
"metadata": {
"id": "gZx-wHJapG5w"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -49,55 +35,50 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 2,
"import os\n",
"import litellm\n",
"from litellm import completion"
],
"metadata": { "metadata": {
"id": "VEukLhDzo4vw" "id": "VEukLhDzo4vw"
}, },
"execution_count": 2, "outputs": [],
"outputs": [] "source": [
"import os\n",
"from litellm import completion"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Setup"
],
"metadata": { "metadata": {
"id": "4STYM2OHFNlc" "id": "4STYM2OHFNlc"
} },
"source": [
"## Setup"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 21,
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
],
"metadata": { "metadata": {
"id": "DorpLxw1FHbC" "id": "DorpLxw1FHbC"
}, },
"execution_count": 21, "outputs": [],
"outputs": [] "source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "syF3dTdKFSQQ"
},
"source": [ "source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n", "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`" "### Pass Your Baseten model `Version ID` as `model`"
], ]
"metadata": {
"id": "syF3dTdKFSQQ"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 18,
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -105,18 +86,16 @@
"id": "rPgSoMlsojz0", "id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050" "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
}, },
"execution_count": 18,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stderr", "name": "stderr",
"output_type": "stream",
"text": [ "text": [
"\u001b[32mINFO\u001b[0m API key set.\n", "\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n" "INFO:baseten:API key set.\n"
] ]
}, },
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"{'choices': [{'finish_reason': 'stop',\n", "{'choices': [{'finish_reason': 'stop',\n",
@ -127,28 +106,30 @@
" 'model': 'qvv0xeq'}" " 'model': 'qvv0xeq'}"
] ]
}, },
"execution_count": 18,
"metadata": {}, "metadata": {},
"execution_count": 18 "output_type": "execute_result"
} }
],
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "7n21UroEGCGa"
},
"source": [ "source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n", "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`" "### Pass Your Baseten model `Version ID` as `model`"
], ]
"metadata": {
"id": "7n21UroEGCGa"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 19,
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -156,18 +137,16 @@
"id": "uLVWFH899lAF", "id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d" "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
}, },
"execution_count": 19,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stderr", "name": "stderr",
"output_type": "stream",
"text": [ "text": [
"\u001b[32mINFO\u001b[0m API key set.\n", "\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n" "INFO:baseten:API key set.\n"
] ]
}, },
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"{'choices': [{'finish_reason': 'stop',\n", "{'choices': [{'finish_reason': 'stop',\n",
@ -178,28 +157,30 @@
" 'model': 'q841o8w'}" " 'model': 'q841o8w'}"
] ]
}, },
"execution_count": 19,
"metadata": {}, "metadata": {},
"execution_count": 19 "output_type": "execute_result"
} }
],
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "6-TFwmPAGPXq"
},
"source": [ "source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n", "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`" "### Pass Your Baseten model `Version ID` as `model`"
], ]
"metadata": {
"id": "6-TFwmPAGPXq"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 20,
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -207,18 +188,16 @@
"id": "gbeYZOrUE_Bp", "id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a" "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
}, },
"execution_count": 20,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stderr", "name": "stderr",
"output_type": "stream",
"text": [ "text": [
"\u001b[32mINFO\u001b[0m API key set.\n", "\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n" "INFO:baseten:API key set.\n"
] ]
}, },
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"{'choices': [{'finish_reason': 'stop',\n", "{'choices': [{'finish_reason': 'stop',\n",
@ -229,10 +208,30 @@
" 'model': '31dxrj3'}" " 'model': '31dxrj3'}"
] ]
}, },
"execution_count": 20,
"metadata": {}, "metadata": {},
"execution_count": 20 "output_type": "execute_result"
} }
],
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
] ]
} }
] ],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,21 +1,10 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "5hwntUxTMxEk"
},
"source": [ "source": [
"# Langchain liteLLM Demo Notebook\n", "# Langchain liteLLM Demo Notebook\n",
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n", "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
@ -30,10 +19,7 @@
"ChatLiteLLM(model=\"command-nightly\")\n", "ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n", "ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```" "```"
], ]
"metadata": {
"id": "5hwntUxTMxEk"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -48,25 +34,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 2,
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
],
"metadata": { "metadata": {
"id": "MOhRaVnhB-0J" "id": "MOhRaVnhB-0J"
}, },
"execution_count": 2, "outputs": [],
"outputs": [] "source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.schema import HumanMessage"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n", "os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n", "chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
@ -76,30 +76,30 @@
" )\n", " )\n",
"]\n", "]\n",
"chat(messages)" "chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 17
}
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n", "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n", "chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
@ -109,30 +109,30 @@
" )\n", " )\n",
"]\n", "]\n",
"chat(messages)" "chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 23
}
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n", "os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n", "chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
@ -142,30 +142,30 @@
" )\n", " )\n",
"]\n", "]\n",
"chat(messages)" "chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 27
}
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"os.environ['COHERE_API_KEY'] = \"\"\n", "os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n", "chat = ChatLiteLLM(model=\"command-nightly\")\n",
@ -175,27 +175,21 @@
" )\n", " )\n",
"]\n", "]\n",
"chat(messages)" "chat(messages)"
]
}
], ],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "provenance": []
}, },
"id": "tZxpq5PDDY9Y", "kernelspec": {
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666" "display_name": "Python 3",
"name": "python3"
}, },
"execution_count": 30, "language_info": {
"outputs": [ "name": "python"
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 30
} }
] },
} "nbformat": 4,
] "nbformat_minor": 0
} }

View file

@ -43,7 +43,7 @@
"source": [ "source": [
"# set you Vertex AI configs\n", "# set you Vertex AI configs\n",
"import litellm\n", "import litellm\n",
"from litellm import embedding, completion\n", "from litellm import completion\n",
"\n", "\n",
"litellm.vertex_project = \"hardy-device-386718\"\n", "litellm.vertex_project = \"hardy-device-386718\"\n",
"litellm.vertex_location = \"us-central1\"" "litellm.vertex_location = \"us-central1\""

View file

@ -1,80 +1,71 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "vnvlwUDZK7VA"
},
"source": [ "source": [
"## Demo Notebook of Function Calling with liteLLM\n", "## Demo Notebook of Function Calling with liteLLM\n",
"- Supported Providers for Function Calling\n", "- Supported Providers for Function Calling\n",
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n", " - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
"- In this notebook we use function calling with `litellm.completion()`" "- In this notebook we use function calling with `litellm.completion()`"
], ]
"metadata": {
"id": "vnvlwUDZK7VA"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"## Install liteLLM\n",
"!pip install litellm"
],
"metadata": { "metadata": {
"id": "KrINCwRfLgZV" "id": "KrINCwRfLgZV"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"## Install liteLLM\n",
"!pip install litellm"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 2,
"import os, litellm\n",
"from litellm import completion"
],
"metadata": { "metadata": {
"id": "nK7zR5OgLlh2" "id": "nK7zR5OgLlh2"
}, },
"execution_count": 2, "outputs": [],
"outputs": [] "source": [
"import os\n",
"from litellm import completion"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 27,
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
],
"metadata": { "metadata": {
"id": "dCQlyBxKLqbA" "id": "dCQlyBxKLqbA"
}, },
"execution_count": 27, "outputs": [],
"outputs": [] "source": [
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "gfdGv-FMRCdX"
},
"source": [ "source": [
"## Define Messages, Functions\n", "## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n", "We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n", "\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates" "See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
], ]
"metadata": {
"id": "gfdGv-FMRCdX"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "ERzsP1sfM19C"
},
"outputs": [],
"source": [ "source": [
"messages = [\n", "messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n", " {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
@ -104,28 +95,20 @@
" }\n", " }\n",
" }\n", " }\n",
" ]" " ]"
], ]
"metadata": {
"id": "ERzsP1sfM19C"
},
"execution_count": 25,
"outputs": []
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
],
"metadata": { "metadata": {
"id": "NX6by2VuRPnp" "id": "NX6by2VuRPnp"
} },
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 9,
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -133,11 +116,10 @@
"id": "QVoJ5PtxMlVx", "id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9" "outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
}, },
"execution_count": 9,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"{\n", "{\n",
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n", " \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
@ -166,24 +148,25 @@
"}\n" "}\n"
] ]
} }
],
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"id": "Yu0o2saDNLx8"
},
"source": [ "source": [
"## Parse GPT 3.5 Response\n", "## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call" "Read Information about what Function to Call"
], ]
"metadata": {
"id": "Yu0o2saDNLx8"
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 11,
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -191,10 +174,8 @@
"id": "u1DzXLJsNOR5", "id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79" "outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
}, },
"execution_count": 11,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n", "<OpenAIObject at 0x7922c70ce930> JSON: {\n",
@ -203,20 +184,19 @@
"}" "}"
] ]
}, },
"execution_count": 11,
"metadata": {}, "metadata": {},
"execution_count": 11 "output_type": "execute_result"
} }
],
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 20,
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -224,33 +204,35 @@
"id": "tYb96Mh0NhH9", "id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f" "outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
}, },
"execution_count": 20,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"get_current_weather {'location': 'Boston, MA'}\n" "get_current_weather {'location': 'Boston, MA'}\n"
] ]
} }
],
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Call the get_current_weather() function"
],
"metadata": { "metadata": {
"id": "z3tstH_yN3fX" "id": "z3tstH_yN3fX"
} },
"source": [
"## Call the get_current_weather() function"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 24,
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -258,37 +240,33 @@
"id": "TSb8JHhgN5Zc", "id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c" "outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
}, },
"execution_count": 24,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"12F\n" "12F\n"
] ]
} }
],
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"## Send the response from get_current_weather back to the model to summarize"
],
"metadata": { "metadata": {
"id": "k4HGJE3NRmMI" "id": "k4HGJE3NRmMI"
} },
"source": [
"## Send the response from get_current_weather back to the model to summarize"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": 26,
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -296,11 +274,10 @@
"id": "a23cmEwiPaw7", "id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21" "outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
}, },
"execution_count": 26,
"outputs": [ "outputs": [
{ {
"output_type": "stream",
"name": "stdout", "name": "stdout",
"output_type": "stream",
"text": [ "text": [
"{\n", "{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n", " \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
@ -325,7 +302,30 @@
"}\n" "}\n"
] ]
} }
],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
] ]
} }
] ],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,13 +1,13 @@
import openai import openai
api_base = f"http://0.0.0.0:8000" api_base = "http://0.0.0.0:8000"
openai.api_base = api_base openai.api_base = api_base
openai.api_key = "temp-key" openai.api_key = "temp-key"
print(openai.api_base) print(openai.api_base)
print(f"LiteLLM: response from proxy with streaming") print("LiteLLM: response from proxy with streaming")
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
model="ollama/llama2", model="ollama/llama2",
messages=[ messages=[

File diff suppressed because one or more lines are too long

View file

@ -1,29 +1,15 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "execution_count": null,
"!pip install litellm"
],
"metadata": { "metadata": {
"id": "j6yJsCGeaq8G" "id": "j6yJsCGeaq8G"
}, },
"execution_count": null, "outputs": [],
"outputs": [] "source": [
"!pip install litellm"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -33,8 +19,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import litellm\n", "from litellm import completion\n",
"from litellm import embedding, completion\n",
"\n", "\n",
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n", "model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
"\n", "\n",
@ -44,9 +29,23 @@
"for model in model_fallback_list:\n", "for model in model_fallback_list:\n",
" try:\n", " try:\n",
" response = completion(model=model, messages=messages)\n", " response = completion(model=model, messages=messages)\n",
" except Exception as e:\n", " except Exception:\n",
" print(f\"error occurred: {traceback.format_exc()}\")" " print(f\"error occurred: {traceback.format_exc()}\")"
] ]
} }
] ],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,14 +1,12 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
from litellm import Router from litellm import Router
import litellm import litellm
@ -137,7 +135,7 @@ for future in futures:
else: else:
failed_calls += 1 failed_calls += 1
print(f"Load test Summary:") print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}") print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}") print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}") print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
from litellm import Router from litellm import Router
import litellm import litellm
@ -160,7 +158,7 @@ for future in futures:
else: else:
failed_calls += 1 failed_calls += 1
print(f"Load test Summary:") print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}") print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}") print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}") print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
from litellm import Router from litellm import Router
import litellm import litellm
@ -132,7 +130,7 @@ for future in futures:
else: else:
failed_calls += 1 failed_calls += 1
print(f"Load test Summary:") print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}") print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}") print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}") print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,9 @@
from fastapi import FastAPI from fastapi import FastAPI
import uvicorn import uvicorn
from memory_profiler import profile, memory_usage from memory_profiler import profile
import os import os
import traceback
import asyncio
import pytest
import litellm import litellm
from litellm import Router from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv from dotenv import load_dotenv
import uuid import uuid

View file

@ -1,17 +1,16 @@
#### What this tests #### #### What this tests ####
from memory_profiler import profile, memory_usage from memory_profiler import profile
import sys, os, time import sys
import traceback, asyncio import os
import pytest import time
import asyncio
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import litellm import litellm
from litellm import Router from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv from dotenv import load_dotenv
import uuid import uuid

View file

@ -1,17 +1,16 @@
#### What this tests #### #### What this tests ####
from memory_profiler import profile, memory_usage from memory_profiler import profile
import sys, os, time import sys
import traceback, asyncio import os
import pytest import time
import asyncio
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import litellm import litellm
from litellm import Router from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv from dotenv import load_dotenv
import uuid import uuid

View file

@ -1,17 +1,14 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
import copy
load_dotenv() load_dotenv()
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import asyncio import asyncio
from litellm import Router, Timeout from litellm import Timeout
import time import time
from litellm.caching.caching import Cache
import litellm
import openai import openai
### Test just calling AsyncAzureOpenAI ### Test just calling AsyncAzureOpenAI

View file

@ -1,7 +1,6 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
import copy
load_dotenv() load_dotenv()
sys.path.insert( sys.path.insert(

View file

@ -1,7 +1,6 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
import copy
load_dotenv() load_dotenv()
sys.path.insert( sys.path.insert(

View file

@ -1,5 +1,4 @@
import requests import requests
import json
def get_initial_config(): def get_initial_config():

View file

@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):
litellm_model_name = litellm_params.get("model", "") or "" litellm_model_name = litellm_params.get("model", "") or ""
if "vertex_ai/" in litellm_model_name: if "vertex_ai/" in litellm_model_name:
print(f"\033[91m\nSkipping Vertex AI model\033[0m", model) print("\033[91m\nSkipping Vertex AI model\033[0m", model)
continue continue
for param, value in litellm_params.items(): for param, value in litellm_params.items():

View file

@ -1,7 +1,6 @@
import os import os
from openai import OpenAI from openai import OpenAI
from dotenv import load_dotenv from dotenv import load_dotenv
import httpx
import concurrent.futures import concurrent.futures
load_dotenv() load_dotenv()

View file

@ -2,21 +2,16 @@
import json import json
import boto3 import boto3
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io import io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
import litellm
import io
import json
class TokenIterator: class TokenIterator:
@ -48,7 +43,6 @@ payload = {
"stream": True, "stream": True,
} }
import boto3
client = boto3.client("sagemaker-runtime", region_name="us-west-2") client = boto3.client("sagemaker-runtime", region_name="us-west-2")
response = client.invoke_endpoint_with_response_stream( response = client.invoke_endpoint_with_response_stream(

View file

@ -111,7 +111,6 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import mlflow\n",
"mlflow.langchain.autolog()" "mlflow.langchain.autolog()"
] ]
}, },

View file

@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
""" """
import asyncio import asyncio
import os
# Enter your DATABASE_URL here # Enter your DATABASE_URL here
@ -33,7 +32,7 @@ async def check_view_exists(): # noqa: PLR0915
# Try to select one row from the view # Try to select one row from the view
await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
print("LiteLLM_VerificationTokenView Exists!") # noqa print("LiteLLM_VerificationTokenView Exists!") # noqa
except Exception as e: except Exception:
# If an error occurs, the view does not exist, so create it # If an error occurs, the view does not exist, so create it
await db.execute_raw( await db.execute_raw(
""" """
@ -54,7 +53,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
print("MonthlyGlobalSpend Exists!") # noqa print("MonthlyGlobalSpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
SELECT SELECT
@ -74,7 +73,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
print("Last30dKeysBySpend Exists!") # noqa print("Last30dKeysBySpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
SELECT SELECT
@ -102,7 +101,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
print("Last30dModelsBySpend Exists!") # noqa print("Last30dModelsBySpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
SELECT SELECT
@ -124,7 +123,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
print("MonthlyGlobalSpendPerKey Exists!") # noqa print("MonthlyGlobalSpendPerKey Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS
SELECT SELECT
@ -147,7 +146,7 @@ async def check_view_exists(): # noqa: PLR0915
"""SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1""" """SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
) )
print("MonthlyGlobalSpendPerUserPerKey Exists!") # noqa print("MonthlyGlobalSpendPerUserPerKey Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS
SELECT SELECT
@ -171,7 +170,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""") await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
print("DailyTagSpend Exists!") # noqa print("DailyTagSpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW DailyTagSpend AS CREATE OR REPLACE VIEW DailyTagSpend AS
SELECT SELECT
@ -189,7 +188,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
print("Last30dTopEndUsersSpend Exists!") # noqa print("Last30dTopEndUsersSpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE VIEW "Last30dTopEndUsersSpend" AS CREATE VIEW "Last30dTopEndUsersSpend" AS
SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend

View file

@ -17,7 +17,7 @@ async def log_event(request: Request):
# For now, just printing the received data # For now, just printing the received data
return {"message": "Request received successfully"} return {"message": "Request received successfully"}
except Exception as e: except Exception:
raise HTTPException(status_code=500, detail="Internal Server Error") raise HTTPException(status_code=500, detail="Internal Server Error")

View file

@ -2,12 +2,10 @@
#### What this does #### #### What this does ####
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import dotenv, os import os
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching.caching import DualCache
from typing import Literal, Union, Optional from typing import Optional
import traceback import traceback
@ -15,10 +13,8 @@ import traceback
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import dotenv, os import litellm
import traceback import uuid
import datetime, subprocess, sys
import litellm, uuid
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger

View file

@ -11,9 +11,9 @@ import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from typing import Optional, Literal, Union, Any from typing import Optional, Literal, Any
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_guardrail import CustomGuardrail
from fastapi import HTTPException from fastapi import HTTPException
@ -23,14 +23,10 @@ from litellm.litellm_core_utils.logging_utils import (
convert_litellm_response_object_to_str, convert_litellm_response_object_to_str,
) )
from typing import List from typing import List
from datetime import datetime
import aiohttp, asyncio
from litellm._logging import verbose_proxy_logger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
import httpx
import json import json
from litellm.types.guardrails import GuardrailEventHooks from litellm.types.guardrails import GuardrailEventHooks
@ -147,7 +143,6 @@ class AporiaGuardrail(CustomGuardrail):
from litellm.proxy.common_utils.callback_utils import ( from litellm.proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header, add_guardrail_to_applied_guardrails_header,
) )
from litellm.types.guardrails import GuardrailEventHooks
""" """
Use this for the post call moderation with Guardrails Use this for the post call moderation with Guardrails
@ -183,7 +178,6 @@ class AporiaGuardrail(CustomGuardrail):
from litellm.proxy.common_utils.callback_utils import ( from litellm.proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header, add_guardrail_to_applied_guardrails_header,
) )
from litellm.types.guardrails import GuardrailEventHooks
event_type: GuardrailEventHooks = GuardrailEventHooks.during_call event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
if self.should_run_guardrail(data=data, event_type=event_type) is not True: if self.should_run_guardrail(data=data, event_type=event_type) is not True:

View file

@ -7,14 +7,13 @@
## Reject a call / response if it contains certain keywords ## Reject a call / response if it contains certain keywords
from typing import Optional, Literal from typing import Literal
import litellm import litellm
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException from fastapi import HTTPException
import json, traceback
class _ENTERPRISE_BannedKeywords(CustomLogger): class _ENTERPRISE_BannedKeywords(CustomLogger):
@ -73,7 +72,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
- check if user id part of call - check if user id part of call
- check if user id part of blocked list - check if user id part of blocked list
""" """
self.print_verbose(f"Inside Banned Keyword List Pre-Call Hook") self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
if call_type == "completion" and "messages" in data: if call_type == "completion" and "messages" in data:
for m in data["messages"]: for m in data["messages"]:
if "content" in m and isinstance(m["content"], str): if "content" in m and isinstance(m["content"], str):

View file

@ -15,7 +15,6 @@ from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException from fastapi import HTTPException
import json, traceback
class _ENTERPRISE_BlockedUserList(CustomLogger): class _ENTERPRISE_BlockedUserList(CustomLogger):
@ -69,7 +68,7 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
- check if end-user in cache - check if end-user in cache
- check if end-user in db - check if end-user in db
""" """
self.print_verbose(f"Inside Blocked User List Pre-Call Hook") self.print_verbose("Inside Blocked User List Pre-Call Hook")
if "user_id" in data or "user" in data: if "user_id" in data or "user" in data:
user = data.get("user_id", data.get("user", "")) user = data.get("user_id", data.get("user", ""))
if ( if (

View file

@ -7,21 +7,12 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
from typing import Optional, Literal, Union from typing import Literal
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
class _ENTERPRISE_GoogleTextModeration(CustomLogger): class _ENTERPRISE_GoogleTextModeration(CustomLogger):

View file

@ -7,28 +7,24 @@
# +-------------------------------------------------------------+ # +-------------------------------------------------------------+
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
import sys, os import sys
import os
from collections.abc import Iterable from collections.abc import Iterable
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from typing import Optional, Literal, Union from typing import Optional, Literal
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.types.utils import ( from litellm.types.utils import (
ModelResponse, ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
Choices, Choices,
) )
from datetime import datetime
import aiohttp, asyncio
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -7,26 +7,13 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
## This provides an LLM Guard Integration for content moderation on the proxy ## This provides an LLM Guard Integration for content moderation on the proxy
from typing import Optional, Literal, Union from typing import Optional, Literal
import litellm import litellm
import traceback
import sys
import uuid
import os
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp import aiohttp
import asyncio
from litellm.utils import get_formatted_prompt from litellm.utils import get_formatted_prompt
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
@ -164,7 +151,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
"moderation", "moderation",
"audio_transcription", "audio_transcription",
] ]
except Exception as e: except Exception:
self.print_verbose( self.print_verbose(
f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']" f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
) )

View file

@ -5,27 +5,19 @@
# +-------------------------------------------------------------+ # +-------------------------------------------------------------+
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
import sys, os import sys
import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from typing import Optional, Literal, Union from typing import Literal
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
from litellm._logging import verbose_proxy_logger
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -471,8 +471,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
data: dict, data: dict,
call_type: str, # "completion", "embeddings", "image_generation", "moderation" call_type: str, # "completion", "embeddings", "image_generation", "moderation"
): ):
from detect_secrets import SecretsCollection
from detect_secrets.settings import default_settings
if await self.should_run_check(user_api_key_dict) is False: if await self.should_run_check(user_api_key_dict) is False:
return return

View file

@ -1,6 +1,5 @@
# Enterprise Proxy Util Endpoints # Enterprise Proxy Util Endpoints
from typing import Optional, List from typing import Optional, List
from litellm._logging import verbose_logger
from litellm.proxy.proxy_server import PrismaClient, HTTPException from litellm.proxy.proxy_server import PrismaClient, HTTPException
from litellm.llms.custom_httpx.http_handler import HTTPHandler from litellm.llms.custom_httpx.http_handler import HTTPHandler
import collections import collections
@ -116,7 +115,7 @@ async def ui_get_spend_by_tags(
def _forecast_daily_cost(data: list): def _forecast_daily_cost(data: list):
from datetime import datetime, timedelta from datetime import timedelta
if len(data) == 0: if len(data) == 0:
return { return {

View file

@ -1063,9 +1063,9 @@ from .llms.sagemaker.chat.transformation import SagemakerChatConfig
from .llms.ollama_chat import OllamaChatConfig from .llms.ollama_chat import OllamaChatConfig
from .llms.bedrock.chat.invoke_handler import ( from .llms.bedrock.chat.invoke_handler import (
AmazonCohereChatConfig, AmazonCohereChatConfig,
AmazonConverseConfig,
bedrock_tool_name_mappings, bedrock_tool_name_mappings,
) )
from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
from .llms.bedrock.common_utils import ( from .llms.bedrock.common_utils import (
AmazonTitanConfig, AmazonTitanConfig,
AmazonAI21Config, AmazonAI21Config,

View file

@ -1,7 +1,6 @@
import json import json
import logging import logging
import os import os
import traceback
from datetime import datetime from datetime import datetime
from logging import Formatter from logging import Formatter

View file

@ -12,12 +12,11 @@ import json
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
import os import os
from typing import Dict, List, Optional, Union from typing import List, Optional, Union
import redis # type: ignore import redis # type: ignore
import redis.asyncio as async_redis # type: ignore import redis.asyncio as async_redis # type: ignore
import litellm
from litellm import get_secret, get_secret_str from litellm import get_secret, get_secret_str
from ._logging import verbose_logger from ._logging import verbose_logger

View file

@ -1,23 +1,12 @@
# What is this? # What is this?
## Translates OpenAI call to Anthropic `/v1/messages` format ## Translates OpenAI call to Anthropic `/v1/messages` format
import json
import os
import traceback import traceback
import uuid from typing import Any, Optional
from typing import Any, Literal, Optional
import dotenv
import httpx
from pydantic import BaseModel
import litellm import litellm
from litellm import ChatCompletionRequest, verbose_logger from litellm import ChatCompletionRequest, verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.types.llms.anthropic import ( from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
AnthropicMessagesRequest,
AnthropicResponse,
ContentBlockDelta,
)
from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse

View file

@ -7,12 +7,11 @@ from functools import partial
from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
import httpx import httpx
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI from openai import AsyncOpenAI, OpenAI
from openai.types.beta.assistant import Assistant from openai.types.beta.assistant import Assistant
from openai.types.beta.assistant_deleted import AssistantDeleted from openai.types.beta.assistant_deleted import AssistantDeleted
import litellm import litellm
from litellm.llms.azure import assistants
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.utils import ( from litellm.utils import (
exception_type, exception_type,

View file

@ -144,7 +144,6 @@ def batch_completion_models(*args, **kwargs):
This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models. This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
It sends requests concurrently and returns the response from the first model that responds. It sends requests concurrently and returns the response from the first model that responds.
""" """
import concurrent
if "model" in kwargs: if "model" in kwargs:
kwargs.pop("model") kwargs.pop("model")

View file

@ -19,24 +19,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union
import httpx import httpx
import litellm import litellm
from litellm import client
from litellm.llms.azure.azure import AzureBatchesAPI from litellm.llms.azure.azure import AzureBatchesAPI
from litellm.llms.openai.openai import OpenAIBatchesAPI from litellm.llms.openai.openai import OpenAIBatchesAPI
from litellm.llms.vertex_ai.batches.handler import ( from litellm.llms.vertex_ai.batches.handler import VertexAIBatchPrediction
VertexAIBatchPrediction, from litellm.secret_managers.main import get_secret_str
) from litellm.types.llms.openai import Batch, CreateBatchRequest, RetrieveBatchRequest
from litellm.secret_managers.main import get_secret, get_secret_str
from litellm.types.llms.openai import (
Batch,
CancelBatchRequest,
CreateBatchRequest,
CreateFileRequest,
FileContentRequest,
FileObject,
FileTypes,
HttpxBinaryResponseContent,
RetrieveBatchRequest,
)
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.utils import supports_httpx_timeout from litellm.utils import supports_httpx_timeout

View file

@ -11,7 +11,7 @@ import json
import os import os
import threading import threading
import time import time
from typing import Literal, Optional, Union from typing import Literal, Optional
import litellm import litellm
from litellm.utils import ModelResponse from litellm.utils import ModelResponse

View file

@ -8,16 +8,12 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
import ast import ast
import asyncio
import hashlib import hashlib
import inspect
import io
import json import json
import logging
import time import time
import traceback import traceback
from enum import Enum from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union from typing import Any, Dict, List, Optional, Set, Union
from openai.types.audio.transcription_create_params import TranscriptionCreateParams from openai.types.audio.transcription_create_params import TranscriptionCreateParams
from openai.types.chat.completion_create_params import ( from openai.types.chat.completion_create_params import (
@ -41,7 +37,7 @@ from litellm.types.utils import all_litellm_params
from .base_cache import BaseCache from .base_cache import BaseCache
from .disk_cache import DiskCache from .disk_cache import DiskCache
from .dual_cache import DualCache from .dual_cache import DualCache # noqa
from .in_memory_cache import InMemoryCache from .in_memory_cache import InMemoryCache
from .qdrant_semantic_cache import QdrantSemanticCache from .qdrant_semantic_cache import QdrantSemanticCache
from .redis_cache import RedisCache from .redis_cache import RedisCache

View file

@ -35,13 +35,7 @@ from pydantic import BaseModel
import litellm import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.caching.caching import ( from litellm.caching.caching import S3Cache
Cache,
QdrantSemanticCache,
RedisCache,
RedisSemanticCache,
S3Cache,
)
from litellm.litellm_core_utils.logging_utils import ( from litellm.litellm_core_utils.logging_utils import (
_assemble_complete_response_from_streaming_chunks, _assemble_complete_response_from_streaming_chunks,
) )
@ -550,12 +544,7 @@ class LLMCachingHandler:
Returns: Returns:
Optional[Any]: Optional[Any]:
""" """
from litellm.utils import ( from litellm.utils import convert_to_model_response_object
CustomStreamWrapper,
convert_to_model_response_object,
convert_to_streaming_response,
convert_to_streaming_response_async,
)
if ( if (
call_type == CallTypes.acompletion.value call_type == CallTypes.acompletion.value

View file

@ -1,8 +1,6 @@
import json import json
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional
from litellm._logging import print_verbose
from .base_cache import BaseCache from .base_cache import BaseCache
if TYPE_CHECKING: if TYPE_CHECKING:

View file

@ -12,7 +12,7 @@ import asyncio
import time import time
import traceback import traceback
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING, Any, List, Optional, Tuple from typing import TYPE_CHECKING, Any, List, Optional
import litellm import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger

View file

@ -15,7 +15,6 @@ from typing import Any
import litellm import litellm
from litellm._logging import print_verbose from litellm._logging import print_verbose
from litellm.types.caching import LiteLLMCacheType
from .base_cache import BaseCache from .base_cache import BaseCache

View file

@ -13,7 +13,6 @@ import asyncio
import inspect import inspect
import json import json
import time import time
import traceback
from datetime import timedelta from datetime import timedelta
from typing import TYPE_CHECKING, Any, List, Optional, Tuple from typing import TYPE_CHECKING, Any, List, Optional, Tuple
@ -21,8 +20,7 @@ import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
from litellm.types.caching import RedisPipelineIncrementOperation from litellm.types.caching import RedisPipelineIncrementOperation
from litellm.types.services import ServiceLoggerPayload, ServiceTypes from litellm.types.services import ServiceTypes
from litellm.types.utils import all_litellm_params
from .base_cache import BaseCache from .base_cache import BaseCache
@ -53,7 +51,6 @@ class RedisCache(BaseCache):
startup_nodes: Optional[List] = None, # for redis-cluster startup_nodes: Optional[List] = None, # for redis-cluster
**kwargs, **kwargs,
): ):
import redis
from litellm._service_logger import ServiceLogging from litellm._service_logger import ServiceLogging

View file

@ -32,7 +32,6 @@ class RedisSemanticCache(BaseCache):
**kwargs, **kwargs,
): ):
from redisvl.index import SearchIndex from redisvl.index import SearchIndex
from redisvl.query import VectorQuery
print_verbose( print_verbose(
"redis semantic-cache initializing INDEX - litellm_semantic_cache_index" "redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
@ -141,7 +140,6 @@ class RedisSemanticCache(BaseCache):
def get_cache(self, key, **kwargs): def get_cache(self, key, **kwargs):
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}") print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
import numpy as np
from redisvl.query import VectorQuery from redisvl.query import VectorQuery
# query # query
@ -253,7 +251,6 @@ class RedisSemanticCache(BaseCache):
async def async_get_cache(self, key, **kwargs): async def async_get_cache(self, key, **kwargs):
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}") print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
import numpy as np
from redisvl.query import VectorQuery from redisvl.query import VectorQuery
from litellm.proxy.proxy_server import llm_model_list, llm_router from litellm.proxy.proxy_server import llm_model_list, llm_router

View file

@ -12,11 +12,9 @@ Has 4 methods:
import ast import ast
import asyncio import asyncio
import json import json
from typing import Any, Optional from typing import Optional
import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.types.caching import LiteLLMCacheType
from .base_cache import BaseCache from .base_cache import BaseCache
@ -103,7 +101,6 @@ class S3Cache(BaseCache):
self.set_cache(key=key, value=value, **kwargs) self.set_cache(key=key, value=value, **kwargs)
def get_cache(self, key, **kwargs): def get_cache(self, key, **kwargs):
import boto3
import botocore import botocore
try: try:

View file

@ -1,7 +1,6 @@
# What is this? # What is this?
## File for 'response_cost' calculation in Logging ## File for 'response_cost' calculation in Logging
import time import time
import traceback
from typing import Any, List, Literal, Optional, Tuple, Union from typing import Any, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel from pydantic import BaseModel
@ -44,14 +43,12 @@ from litellm.llms.openai.cost_calculation import (
cost_per_second as openai_cost_per_second, cost_per_second as openai_cost_per_second,
) )
from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
from litellm.llms.openai.cost_calculation import cost_router as openai_cost_router
from litellm.llms.together_ai.cost_calculator import get_model_params_and_category from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
from litellm.llms.vertex_ai.image_generation.cost_calculator import ( from litellm.llms.vertex_ai.image_generation.cost_calculator import (
cost_calculator as vertex_ai_image_cost_calculator, cost_calculator as vertex_ai_image_cost_calculator,
) )
from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.rerank import RerankResponse from litellm.types.rerank import RerankResponse
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage
from litellm.utils import ( from litellm.utils import (
CallTypes, CallTypes,

View file

@ -14,14 +14,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
import httpx import httpx
import litellm import litellm
from litellm import client, get_secret_str from litellm import get_secret_str
from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI
from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI
from litellm.llms.vertex_ai.files.handler import ( from litellm.llms.vertex_ai.files.handler import VertexAIFilesHandler
VertexAIFilesHandler,
)
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
Batch,
CreateFileRequest, CreateFileRequest,
FileContentRequest, FileContentRequest,
FileTypes, FileTypes,

View file

@ -19,10 +19,10 @@ import httpx
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI, FineTuningJob, FineTuningJobCreate from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import Hyperparameters from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
from litellm.types.router import * from litellm.types.router import *
from litellm.utils import supports_httpx_timeout from litellm.utils import supports_httpx_timeout

View file

@ -6,11 +6,9 @@ Slack alerts are sent every 10s or when events are greater than X events
see custom_batch_logger.py for more details / defaults see custom_batch_logger.py for more details / defaults
""" """
import os from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
from litellm._logging import verbose_logger, verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import AlertType, WebhookEvent
if TYPE_CHECKING: if TYPE_CHECKING:
from .slack_alerting import SlackAlerting as _SlackAlerting from .slack_alerting import SlackAlerting as _SlackAlerting
@ -21,7 +19,6 @@ else:
def squash_payloads(queue): def squash_payloads(queue):
import json
squashed = {} squashed = {}
if len(queue) == 0: if len(queue) == 0:

View file

@ -4,16 +4,10 @@ import asyncio
import datetime import datetime
import os import os
import random import random
import threading
import time import time
import traceback from datetime import timedelta
from datetime import datetime as dt from typing import Any, Dict, List, Literal, Optional, Union
from datetime import timedelta, timezone
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, Union, get_args
import aiohttp
import dotenv
from openai import APIError from openai import APIError
import litellm import litellm
@ -26,22 +20,13 @@ from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.litellm_core_utils.exception_mapping_utils import ( from litellm.litellm_core_utils.exception_mapping_utils import (
_add_key_name_and_team_to_alert, _add_key_name_and_team_to_alert,
) )
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import ( from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
AlertType,
CallInfo,
UserAPIKeyAuth,
VirtualKeyEvent,
WebhookEvent,
)
from litellm.router import Router from litellm.router import Router
from litellm.types.integrations.slack_alerting import * from litellm.types.integrations.slack_alerting import *
from litellm.types.router import LiteLLM_Params
from ..email_templates.templates import * from ..email_templates.templates import *
from .batching_handler import send_to_webhook, squash_payloads from .batching_handler import send_to_webhook, squash_payloads
@ -1261,7 +1246,7 @@ Model Info:
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
""" """
from litellm.proxy.proxy_server import premium_user, prisma_client from litellm.proxy.proxy_server import premium_user
from litellm.proxy.utils import send_email from litellm.proxy.utils import send_email
email_logo_url = os.getenv( email_logo_url = os.getenv(
@ -1370,7 +1355,6 @@ Model Info:
if alert_type not in self.alert_types: if alert_type not in self.alert_types:
return return
import json
from datetime import datetime from datetime import datetime
# Get the current timestamp # Get the current timestamp

View file

@ -5,7 +5,6 @@ Utils used for slack alerting
import asyncio import asyncio
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.proxy._types import AlertType from litellm.proxy._types import AlertType
from litellm.secret_managers.main import get_secret from litellm.secret_managers.main import get_secret

View file

@ -6,14 +6,9 @@ import asyncio
import json import json
import os import os
import random import random
import time
import traceback
import types import types
import uuid from typing import Any, Dict, List, Optional
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict, Union
import dotenv # type: ignore
import httpx import httpx
from pydantic import BaseModel # type: ignore from pydantic import BaseModel # type: ignore
@ -21,11 +16,7 @@ import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.prompt_templates.common_utils import (
get_content_from_model_response,
)
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
@ -33,7 +24,6 @@ from litellm.types.integrations.argilla import (
SUPPORTED_PAYLOAD_FIELDS, SUPPORTED_PAYLOAD_FIELDS,
ArgillaCredentialsObject, ArgillaCredentialsObject,
ArgillaItem, ArgillaItem,
ArgillaPayload,
) )
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import StandardLoggingPayload

View file

@ -5,7 +5,7 @@ this file has Arize ai specific helper functions
""" """
import json import json
from typing import TYPE_CHECKING, Any, Optional, Union from typing import TYPE_CHECKING, Any, Optional
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
@ -30,7 +30,6 @@ class ArizeLogger:
def set_arize_ai_attributes(span: Span, kwargs, response_obj): def set_arize_ai_attributes(span: Span, kwargs, response_obj):
from litellm.integrations._types.open_inference import ( from litellm.integrations._types.open_inference import (
MessageAttributes, MessageAttributes,
MessageContentAttributes,
OpenInferenceSpanKindValues, OpenInferenceSpanKindValues,
SpanAttributes, SpanAttributes,
) )

View file

@ -3,23 +3,8 @@ import json
import os import os
import uuid import uuid
from datetime import datetime, timedelta from datetime import datetime, timedelta
from re import S, T from typing import List, Optional
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Optional,
Tuple,
TypedDict,
Union,
)
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.constants import AZURE_STORAGE_MSFT_VERSION from litellm.constants import AZURE_STORAGE_MSFT_VERSION
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger

View file

@ -2,15 +2,10 @@
## Log success + failure events to Braintrust ## Log success + failure events to Braintrust
import copy import copy
import json
import os import os
import threading
import traceback
import uuid
from datetime import datetime from datetime import datetime
from typing import Literal, Optional from typing import Optional
import dotenv
import httpx import httpx
from pydantic import BaseModel from pydantic import BaseModel
@ -18,12 +13,11 @@ import litellm
from litellm import verbose_logger from litellm import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler, HTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.utils import get_formatted_prompt, print_verbose from litellm.utils import print_verbose
global_braintrust_http_handler = get_async_httpx_client( global_braintrust_http_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback llm_provider=httpxSpecialProvider.LoggingCallback

View file

@ -6,7 +6,7 @@ Use this if you want your logs to be stored in memory and flushed periodically
import asyncio import asyncio
import time import time
from typing import List, Literal, Optional from typing import List, Optional
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger

View file

@ -1,4 +1,4 @@
from typing import List, Literal, Optional from typing import List, Optional
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger

View file

@ -1,18 +1,14 @@
#### What this does #### #### What this does ####
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import os
import traceback import traceback
from datetime import datetime as datetimeObj
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
import dotenv
from pydantic import BaseModel from pydantic import BaseModel
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.argilla import ArgillaItem from litellm.types.integrations.argilla import ArgillaItem
from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import ( from litellm.types.utils import (
AdapterCompletionStreamWrapper, AdapterCompletionStreamWrapper,
EmbeddingResponse, EmbeddingResponse,

View file

@ -16,11 +16,10 @@ For batching specific details see CustomBatchLogger class
import asyncio import asyncio
import datetime import datetime
import os import os
import sys
import traceback import traceback
import uuid import uuid
from datetime import datetime as datetimeObj from datetime import datetime as datetimeObj
from typing import Any, Dict, List, Optional, Union from typing import Any, List, Optional, Union
from httpx import Response from httpx import Response
@ -32,7 +31,6 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.datadog import * from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import StandardLoggingPayload

View file

@ -8,12 +8,9 @@ API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=examp
import asyncio import asyncio
import os import os
import traceback
import uuid import uuid
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional
from httpx import Response
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger

View file

@ -1,14 +1,11 @@
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import datetime
import os import os
import traceback import traceback
import uuid import uuid
from typing import Any from typing import Any
import dotenv
import litellm import litellm

View file

@ -2,7 +2,6 @@
Functions for sending Email Alerts Functions for sending Email Alerts
""" """
import asyncio
import os import os
from typing import List, Optional from typing import List, Optional
@ -20,7 +19,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
) )
if team_id is None: if team_id is None:
return [] return []
from litellm.proxy.proxy_server import premium_user, prisma_client from litellm.proxy.proxy_server import prisma_client
if prisma_client is None: if prisma_client is None:
raise Exception("Not connected to DB!") raise Exception("Not connected to DB!")
@ -72,7 +71,6 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
Send an Email Alert to All Team Members when the Team Budget is crossed Send an Email Alert to All Team Members when the Team Budget is crossed
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
""" """
from litellm.proxy.proxy_server import premium_user, prisma_client
from litellm.proxy.utils import send_email from litellm.proxy.utils import send_email
_team_id = webhook_event.team_id _team_id = webhook_event.team_id

View file

@ -1,15 +1,12 @@
import os import os
from datetime import datetime
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import httpx
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
_get_httpx_client,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )

View file

@ -1,27 +1,14 @@
import asyncio import asyncio
import json
import os import os
import uuid import uuid
from datetime import datetime from datetime import datetime
from re import S from typing import TYPE_CHECKING, Any, Dict, List, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.proxy._types import CommonProxyErrors
from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
from litellm.types.integrations.gcs_bucket import * from litellm.types.integrations.gcs_bucket import *
from litellm.types.utils import ( from litellm.types.utils import StandardLoggingPayload
StandardCallbackDynamicParams,
StandardLoggingMetadata,
StandardLoggingPayload,
)
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.llms.vertex_ai.vertex_llm_base import VertexBase

View file

@ -1,13 +1,7 @@
import json import json
import os import os
import uuid from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
@ -15,11 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.types.integrations.gcs_bucket import * from litellm.types.integrations.gcs_bucket import *
from litellm.types.utils import ( from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload
StandardCallbackDynamicParams,
StandardLoggingMetadata,
StandardLoggingPayload,
)
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -190,9 +180,7 @@ class GCSBucketBase(CustomBatchLogger):
This function is used to get the Vertex instance for the GCS Bucket Logger. This function is used to get the Vertex instance for the GCS Bucket Logger.
It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it. It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it.
""" """
from litellm.llms.vertex_ai.vertex_llm_base import ( from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
VertexBase,
)
_in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials) _in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials)
if _in_memory_key not in self.vertex_instances: if _in_memory_key not in self.vertex_instances:

View file

@ -3,10 +3,7 @@
import os import os
import traceback import traceback
import dotenv
import litellm import litellm
from litellm._logging import verbose_logger
class HeliconeLogger: class HeliconeLogger:

View file

@ -3,11 +3,9 @@
import json import json
import os import os
import traceback
import uuid import uuid
from typing import Literal, Optional from typing import Literal, Optional
import dotenv
import httpx import httpx
import litellm import litellm

View file

@ -3,7 +3,6 @@
import copy import copy
import os import os
import traceback import traceback
import types
from collections.abc import MutableMapping, MutableSequence, MutableSet from collections.abc import MutableMapping, MutableSequence, MutableSet
from typing import TYPE_CHECKING, Any, Dict, Optional, cast from typing import TYPE_CHECKING, Any, Dict, Optional, cast

View file

@ -6,11 +6,8 @@ Used to get the LangFuseLogger for a given request
Handles Key/Team Based Langfuse Logging Handles Key/Team Based Langfuse Logging
""" """
import os
from typing import TYPE_CHECKING, Any, Dict, Optional from typing import TYPE_CHECKING, Any, Dict, Optional
from packaging.version import Version
from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams
from .langfuse import LangFuseLogger, LangfuseLoggingConfig from .langfuse import LangFuseLogger, LangfuseLoggingConfig

View file

@ -3,14 +3,12 @@
import asyncio import asyncio
import os import os
import random import random
import time
import traceback import traceback
import types import types
import uuid import uuid
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict, Union from typing import Any, Dict, List, Optional
import dotenv # type: ignore
import httpx import httpx
from pydantic import BaseModel # type: ignore from pydantic import BaseModel # type: ignore
@ -18,7 +16,6 @@ import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )

View file

@ -1,9 +1,7 @@
import traceback
import json import json
from litellm.integrations.custom_logger import CustomLogger from typing import TYPE_CHECKING, Any
from litellm.proxy._types import SpanAttributes
from typing import TYPE_CHECKING, Any, Optional, Union from litellm.proxy._types import SpanAttributes
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span

View file

@ -3,17 +3,12 @@
import json import json
import os import os
import traceback
import uuid
import dotenv
import httpx import httpx
import litellm import litellm
from litellm import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler, HTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,

View file

@ -1,7 +1,6 @@
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from functools import wraps
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import litellm import litellm
@ -10,10 +9,7 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm.types.services import ServiceLoggerPayload from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import ( from litellm.types.utils import (
ChatCompletionMessageToolCall, ChatCompletionMessageToolCall,
EmbeddingResponse,
Function, Function,
ImageResponse,
ModelResponse,
StandardLoggingPayload, StandardLoggingPayload,
) )
@ -139,7 +135,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[datetime, float]] = None,
event_metadata: Optional[dict] = None, event_metadata: Optional[dict] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -201,7 +196,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[float, datetime]] = None, end_time: Optional[Union[float, datetime]] = None,
event_metadata: Optional[dict] = None, event_metadata: Optional[dict] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -666,7 +660,6 @@ class OpenTelemetry(CustomLogger):
span.set_attribute(key, primitive_value) span.set_attribute(key, primitive_value)
def set_raw_request_attributes(self, span: Span, kwargs, response_obj): def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
from litellm.proxy._types import SpanAttributes
kwargs.get("optional_params", {}) kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}
@ -834,7 +827,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -889,7 +881,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode

View file

@ -3,8 +3,6 @@ import os
import time import time
from typing import Dict, Final, List, Optional from typing import Dict, Final, List, Optional
from litellm.types.utils import ModelResponse
CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config" CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config"

View file

@ -1,15 +1,10 @@
# used for /metrics endpoint on LiteLLM Proxy # used for /metrics endpoint on LiteLLM Proxy
#### What this does #### #### What this does ####
# On success, log events to Prometheus # On success, log events to Prometheus
import os
import subprocess
import sys import sys
import traceback from datetime import datetime, timedelta
import uuid from typing import Optional
from datetime import date, datetime, timedelta
from typing import Optional, TypedDict, Union
import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth

View file

@ -2,13 +2,10 @@
Helper functions to query prometheus API Helper functions to query prometheus API
""" """
import asyncio
import os
import time import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Optional from typing import Optional
import litellm
from litellm import get_secret from litellm import get_secret
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (

View file

@ -3,15 +3,8 @@
# On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers) # On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
import datetime
import os
import subprocess
import sys
import traceback
import uuid
from typing import List, Optional, Union from typing import List, Optional, Union
import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.types.integrations.prometheus import LATENCY_BUCKETS from litellm.types.integrations.prometheus import LATENCY_BUCKETS
from litellm.types.services import ServiceLoggerPayload, ServiceTypes from litellm.types.services import ServiceLoggerPayload, ServiceTypes

View file

@ -1,12 +1,6 @@
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import datetime
import os
import subprocess
import sys
import traceback
import uuid
from typing import Optional from typing import Optional
import litellm import litellm

View file

@ -1,14 +1,11 @@
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import datetime
import os import os
import subprocess import subprocess
import sys import sys
import traceback import traceback
import dotenv
import litellm import litellm

View file

@ -1,6 +1,5 @@
import traceback import traceback
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
@ -12,9 +11,7 @@ class TraceloopLogger:
def __init__(self): def __init__(self):
try: try:
from opentelemetry.sdk.trace.export import ConsoleSpanExporter
from traceloop.sdk import Traceloop from traceloop.sdk import Traceloop
from traceloop.sdk.instruments import Instruments
from traceloop.sdk.tracing.tracing import TracerWrapper from traceloop.sdk.tracing.tracing import TracerWrapper
except ModuleNotFoundError as e: except ModuleNotFoundError as e:
verbose_logger.error( verbose_logger.error(
@ -39,7 +36,6 @@ class TraceloopLogger:
level="DEFAULT", level="DEFAULT",
status_message=None, status_message=None,
): ):
from opentelemetry import trace
from opentelemetry.semconv.ai import SpanAttributes from opentelemetry.semconv.ai import SpanAttributes
from opentelemetry.trace import SpanKind, Status, StatusCode from opentelemetry.trace import SpanKind, Status, StatusCode
@ -78,7 +74,7 @@ class TraceloopLogger:
) )
if "top_p" in optional_params: if "top_p" in optional_params:
span.set_attribute( span.set_attribute(
SpanAttributes.LLM_TOP_P, optional_params.get("top_p") SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
) )
if "tools" in optional_params or "functions" in optional_params: if "tools" in optional_params or "functions" in optional_params:
span.set_attribute( span.set_attribute(

View file

@ -173,16 +173,14 @@ except Exception:
#### What this does #### #### What this does ####
# On success, logs events to Langfuse # On success, logs events to Langfuse
import os
import traceback import traceback
from datetime import datetime
class WeightsBiasesLogger: class WeightsBiasesLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self): def __init__(self):
try: try:
import wandb pass
except Exception: except Exception:
raise Exception( raise Exception(
"\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m" "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"

View file

@ -3,7 +3,6 @@ from typing import Awaitable, Callable, Optional
import anyio import anyio
import anyio.to_thread import anyio.to_thread
from anyio import to_thread
from typing_extensions import ParamSpec, TypeVar from typing_extensions import ParamSpec, TypeVar
T_ParamSpec = ParamSpec("T_ParamSpec") T_ParamSpec = ParamSpec("T_ParamSpec")

View file

@ -1,7 +1,6 @@
# What is this? # What is this?
## Helper utilities ## Helper utilities
import os from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
import httpx import httpx

View file

@ -1,6 +1,4 @@
import json import json
import os
import threading
import traceback import traceback
from typing import Optional from typing import Optional
@ -14,17 +12,14 @@ from ..exceptions import (
APIError, APIError,
AuthenticationError, AuthenticationError,
BadRequestError, BadRequestError,
BudgetExceededError,
ContentPolicyViolationError, ContentPolicyViolationError,
ContextWindowExceededError, ContextWindowExceededError,
NotFoundError, NotFoundError,
OpenAIError,
PermissionDeniedError, PermissionDeniedError,
RateLimitError, RateLimitError,
ServiceUnavailableError, ServiceUnavailableError,
Timeout, Timeout,
UnprocessableEntityError, UnprocessableEntityError,
UnsupportedParamsError,
) )

Some files were not shown because too many files have changed in this diff Show more