(code quality) run ruff rule to ban unused imports (#7313)

* remove unused imports

* fix AmazonConverseConfig

* fix test

* fix import

* ruff check fixes

* test fixes

* fix testing

* fix imports
This commit is contained in:
Ishaan Jaff 2024-12-19 12:33:42 -08:00 committed by GitHub
parent 5e344497ce
commit c7f14e936a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
347 changed files with 5473 additions and 7207 deletions

View file

@ -1,6 +1,4 @@
from locust import HttpUser, task, between, events from locust import HttpUser, task, between
import json
import time
class MyUser(HttpUser): class MyUser(HttpUser):
@ -10,7 +8,7 @@ class MyUser(HttpUser):
def chat_completion(self): def chat_completion(self):
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA", "Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
# Include any additional headers you may need for authentication, etc. # Include any additional headers you may need for authentication, etc.
} }

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,423 +1,422 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "markdown",
"colab": { "metadata": {
"provenance": [] "id": "BmX0b5Ueh91v"
}, },
"kernelspec": { "source": [
"name": "python3", "# LiteLLM - Azure OpenAI + OpenAI Calls\n",
"display_name": "Python 3" "This notebook covers the following for Azure OpenAI + OpenAI:\n",
}, "* Completion - Quick start\n",
"language_info": { "* Completion - Streaming\n",
"name": "python" "* Completion - Azure, OpenAI in separate threads\n",
} "* Completion - Stress Test 10 requests in parallel\n",
"* Completion - Azure, OpenAI in the same thread"
]
}, },
"cells": [ {
{ "cell_type": "code",
"cell_type": "markdown", "execution_count": null,
"source": [ "metadata": {
"# LiteLLM - Azure OpenAI + OpenAI Calls\n", "id": "iHq4d0dpfawS"
"This notebook covers the following for Azure OpenAI + OpenAI:\n", },
"* Completion - Quick start\n", "outputs": [],
"* Completion - Streaming\n", "source": [
"* Completion - Azure, OpenAI in separate threads\n", "!pip install litellm"
"* Completion - Stress Test 10 requests in parallel\n", ]
"* Completion - Azure, OpenAI in the same thread" },
], {
"metadata": { "cell_type": "code",
"id": "BmX0b5Ueh91v" "execution_count": 2,
} "metadata": {
"id": "mnveHO5dfcB0"
},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eo88QUdbiDIE"
},
"source": [
"## Completion - Quick start"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
}, },
"id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
},
"outputs": [
{ {
"cell_type": "code", "name": "stdout",
"execution_count": null, "output_type": "stream",
"metadata": { "text": [
"id": "iHq4d0dpfawS" "Openai Response\n",
}, "\n",
"outputs": [], "{\n",
"source": [ " \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
"!pip install litellm" " \"object\": \"chat.completion\",\n",
] " \"created\": 1694708958,\n",
}, " \"model\": \"gpt-3.5-turbo-0613\",\n",
{ " \"choices\": [\n",
"cell_type": "code", " {\n",
"source": [ " \"index\": 0,\n",
"import os, litellm" " \"message\": {\n",
], " \"role\": \"assistant\",\n",
"metadata": { " \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
"id": "mnveHO5dfcB0" " },\n",
}, " \"finish_reason\": \"stop\"\n",
"execution_count": 2, " }\n",
"outputs": [] " ],\n",
}, " \"usage\": {\n",
{ " \"prompt_tokens\": 13,\n",
"cell_type": "markdown", " \"completion_tokens\": 26,\n",
"source": [ " \"total_tokens\": 39\n",
"## Completion - Quick start" " }\n",
], "}\n",
"metadata": { "Azure Response\n",
"id": "eo88QUdbiDIE" "\n",
} "{\n",
}, " \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
{ " \"object\": \"chat.completion\",\n",
"cell_type": "code", " \"created\": 1694708960,\n",
"source": [ " \"model\": \"gpt-35-turbo\",\n",
"import os\n", " \"choices\": [\n",
"from litellm import completion\n", " {\n",
"\n", " \"index\": 0,\n",
"# openai configs\n", " \"finish_reason\": \"stop\",\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n", " \"message\": {\n",
"\n", " \"role\": \"assistant\",\n",
"# azure openai configs\n", " \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n", " }\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", " }\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", " ],\n",
"\n", " \"usage\": {\n",
"\n", " \"completion_tokens\": 27,\n",
"# openai call\n", " \"prompt_tokens\": 14,\n",
"response = completion(\n", " \"total_tokens\": 41\n",
" model = \"gpt-3.5-turbo\",\n", " }\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", "}\n"
")\n", ]
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Openai Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708958,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 26,\n",
" \"total_tokens\": 39\n",
" }\n",
"}\n",
"Azure Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708960,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 27,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 41\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Completion - Streaming"
],
"metadata": {
"id": "dQMkM-diiKdE"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"OpenAI Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Azure Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n"
],
"metadata": {
"id": "uVvJDVn4g1i1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in separate threads"
],
"metadata": {
"id": "4xrOPnt-oqwm"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create threads for making the completions\n",
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
"\n",
"# Start both threads\n",
"thread1.start()\n",
"thread2.start()\n",
"\n",
"# Wait for both threads to finish\n",
"thread1.join()\n",
"thread2.join()\n",
"\n",
"print(\"Both completions are done.\")"
],
"metadata": {
"id": "V5b5taJPjvC3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Stress Test 10 requests in parallel\n",
"\n"
],
"metadata": {
"id": "lx8DbMBqoAoN"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# Set your API keys\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create and start 10 threads for making completions\n",
"threads = []\n",
"for i in range(10):\n",
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
" threads.append(thread)\n",
" thread.start()\n",
"\n",
"# Wait for all threads to finish\n",
"for thread in threads:\n",
" thread.join()\n",
"\n",
"print(\"All completions are done.\")\n"
],
"metadata": {
"id": "pHYANOlOkoDh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in the same thread"
],
"metadata": {
"id": "yB2NDOO4oxrp"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710847,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 42\n",
" }\n",
"}\n",
"Azure OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710849,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 29,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
}
]
} }
] ],
"source": [
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dQMkM-diiKdE"
},
"source": [
"## Completion - Streaming"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uVvJDVn4g1i1"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"OpenAI Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Azure Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4xrOPnt-oqwm"
},
"source": [
"## Completion - Azure, OpenAI in separate threads"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "V5b5taJPjvC3"
},
"outputs": [],
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create threads for making the completions\n",
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
"\n",
"# Start both threads\n",
"thread1.start()\n",
"thread2.start()\n",
"\n",
"# Wait for both threads to finish\n",
"thread1.join()\n",
"thread2.join()\n",
"\n",
"print(\"Both completions are done.\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lx8DbMBqoAoN"
},
"source": [
"## Completion - Stress Test 10 requests in parallel\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pHYANOlOkoDh"
},
"outputs": [],
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# Set your API keys\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create and start 10 threads for making completions\n",
"threads = []\n",
"for i in range(10):\n",
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
" threads.append(thread)\n",
" thread.start()\n",
"\n",
"# Wait for all threads to finish\n",
"for thread in threads:\n",
" thread.join()\n",
"\n",
"print(\"All completions are done.\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "yB2NDOO4oxrp"
},
"source": [
"## Completion - Azure, OpenAI in the same thread"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710847,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 42\n",
" }\n",
"}\n",
"Azure OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710849,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 29,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
}
],
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

File diff suppressed because one or more lines are too long

View file

@ -1,166 +1,163 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "markdown",
"colab": { "metadata": {
"provenance": [] "id": "MbLbs1tbISk-"
}, },
"kernelspec": { "source": [
"name": "python3", "# LiteLLM Batch Completions Example\n",
"display_name": "Python 3" "\n",
}, "* This tutorial walks through using `batch_completion`\n",
"language_info": { "* Docs: https://docs.litellm.ai/docs/completion/batching"
"name": "python" ]
}
}, },
"cells": [ {
{ "cell_type": "code",
"cell_type": "markdown", "execution_count": null,
"source": [ "metadata": {
"# LiteLLM Batch Completions Example\n", "id": "Ty6-ko_aDlPF"
"\n", },
"* This tutorial walks through using `batch_completion`\n", "outputs": [],
"* Docs: https://docs.litellm.ai/docs/completion/batching" "source": [
], "!pip install litellm"
"metadata": { ]
"id": "MbLbs1tbISk-" },
} {
"cell_type": "markdown",
"metadata": {
"id": "KGhNJRUCIh1j"
},
"source": [
"## Import Batch Completion"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "LOtI43snDrSK"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import batch_completion\n",
"\n",
"# set your API_KEY\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Xhv92NBaIpaw"
},
"source": [
"## Calling `litellm.batch_completion`\n",
"\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
}, },
"id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
},
"outputs": [
{ {
"cell_type": "code", "data": {
"execution_count": null, "text/plain": [
"metadata": { "[<ModelResponse at 0x7a164eed4450> JSON: {\n",
"id": "Ty6-ko_aDlPF" " \"choices\": [\n",
}, " {\n",
"outputs": [], " \"finish_reason\": \"stop\",\n",
"source": [ " \"index\": 0,\n",
"!pip install litellm" " \"message\": {\n",
] " \"content\": \" Good morning!\",\n",
}, " \"role\": \"assistant\",\n",
{ " \"logprobs\": null\n",
"cell_type": "markdown", " }\n",
"source": [ " }\n",
"## Import Batch Completion" " ],\n",
], " \"created\": 1694030351.309254,\n",
"metadata": { " \"model\": \"claude-2\",\n",
"id": "KGhNJRUCIh1j" " \"usage\": {\n",
} " \"prompt_tokens\": 11,\n",
}, " \"completion_tokens\": 3,\n",
{ " \"total_tokens\": 14\n",
"cell_type": "code", " }\n",
"source": [ " },\n",
"import litellm\n", " <ModelResponse at 0x7a164eed5800> JSON: {\n",
"import os\n", " \"choices\": [\n",
"from litellm import batch_completion\n", " {\n",
"\n", " \"finish_reason\": \"stop\",\n",
"# set your API_KEY\n", " \"index\": 0,\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"" " \"message\": {\n",
], " \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
"metadata": { " \"role\": \"assistant\",\n",
"id": "LOtI43snDrSK" " \"logprobs\": null\n",
}, " }\n",
"execution_count": 7, " }\n",
"outputs": [] " ],\n",
}, " \"created\": 1694030352.1215081,\n",
{ " \"model\": \"claude-2\",\n",
"cell_type": "markdown", " \"usage\": {\n",
"source": [ " \"prompt_tokens\": 13,\n",
"## Calling `litellm.batch_completion`\n", " \"completion_tokens\": 22,\n",
"\n", " \"total_tokens\": 35\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call." " }\n",
], " }]"
"metadata": {
"id": "Xhv92NBaIpaw"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Good morning!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030351.309254,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 3,\n",
" \"total_tokens\": 14\n",
" }\n",
" },\n",
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030352.1215081,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 22,\n",
" \"total_tokens\": 35\n",
" }\n",
" }]"
]
},
"metadata": {},
"execution_count": 11
}
] ]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
} }
] ],
"source": [
"import os\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,204 +1,205 @@
{ {
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "680oRk1af-xJ" "id": "680oRk1af-xJ"
}, },
"source": [ "source": [
"# Environment Setup" "# Environment Setup"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "X7TgJFn8f88p"
},
"outputs": [],
"source": [
"import csv\n",
"from typing import Optional\n",
"import httpx, json\n",
"import asyncio\n",
"\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rauw8EOhgBz5"
},
"outputs": [],
"source": [
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
"class HTTPHandler:\n",
" def __init__(self, concurrent_limit=1000):\n",
" # Create a client with a connection pool\n",
" self.client = httpx.AsyncClient(\n",
" limits=httpx.Limits(\n",
" max_connections=concurrent_limit,\n",
" max_keepalive_connections=concurrent_limit,\n",
" )\n",
" )\n",
"\n",
" async def close(self):\n",
" # Close the client when you're done with it\n",
" await self.client.aclose()\n",
"\n",
" async def get(\n",
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
" ):\n",
" response = await self.client.get(url, params=params, headers=headers)\n",
" return response\n",
"\n",
" async def post(\n",
" self,\n",
" url: str,\n",
" data: Optional[dict] = None,\n",
" params: Optional[dict] = None,\n",
" headers: Optional[dict] = None,\n",
" ):\n",
" try:\n",
" response = await self.client.post(\n",
" url, data=data, params=params, headers=headers\n",
" )\n",
" return response\n",
" except Exception as e:\n",
" raise e\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7LXN8zaLgOie"
},
"source": [
"# Import Sheet\n",
"\n",
"\n",
"Format: | ID | Name | Max Budget |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oiED0usegPGf"
},
"outputs": [],
"source": [
"async def import_sheet():\n",
" tasks = []\n",
" http_client = HTTPHandler()\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for row in csv_reader:\n",
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
" tasks.append(task)\n",
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
"\n",
" keys = await asyncio.gather(*tasks)\n",
"\n",
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
" csv_writer.writeheader()\n",
"\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for i, row in enumerate(csv_reader):\n",
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
" csv_writer.writerow(row)\n",
"\n",
" await http_client.close()\n",
"\n",
"asyncio.run(import_sheet())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E7M0Li_UgJeZ"
},
"source": [
"# Create Users + Keys\n",
"\n",
"- Creates a user\n",
"- Creates a key with max budget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZudRFujf7j-"
},
"outputs": [],
"source": [
"\n",
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"key/generate\"\n",
"\n",
" # call /key/generate\n",
" print(\"CALLING /KEY/GENERATE\")\n",
" response = await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"key_alias\": f\"{user_id}-key\",\n",
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
" })\n",
" )\n",
" print(f\"response: {response.text}\")\n",
" return response.json()[\"key\"]\n",
"\n",
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
" \"\"\"\n",
" - call /user/new\n",
" - create key for user\n",
" \"\"\"\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"user/new\"\n",
"\n",
" # call /user/new\n",
" await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"user_alias\": user_name,\n",
" \"auto_create_key\": False,\n",
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
" })\n",
" )\n",
"\n",
" # create key for user\n",
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
}, },
"nbformat": 4, {
"nbformat_minor": 0 "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "X7TgJFn8f88p"
},
"outputs": [],
"source": [
"import csv\n",
"from typing import Optional\n",
"import httpx\n",
"import json\n",
"import asyncio\n",
"\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rauw8EOhgBz5"
},
"outputs": [],
"source": [
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
"class HTTPHandler:\n",
" def __init__(self, concurrent_limit=1000):\n",
" # Create a client with a connection pool\n",
" self.client = httpx.AsyncClient(\n",
" limits=httpx.Limits(\n",
" max_connections=concurrent_limit,\n",
" max_keepalive_connections=concurrent_limit,\n",
" )\n",
" )\n",
"\n",
" async def close(self):\n",
" # Close the client when you're done with it\n",
" await self.client.aclose()\n",
"\n",
" async def get(\n",
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
" ):\n",
" response = await self.client.get(url, params=params, headers=headers)\n",
" return response\n",
"\n",
" async def post(\n",
" self,\n",
" url: str,\n",
" data: Optional[dict] = None,\n",
" params: Optional[dict] = None,\n",
" headers: Optional[dict] = None,\n",
" ):\n",
" try:\n",
" response = await self.client.post(\n",
" url, data=data, params=params, headers=headers\n",
" )\n",
" return response\n",
" except Exception as e:\n",
" raise e\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7LXN8zaLgOie"
},
"source": [
"# Import Sheet\n",
"\n",
"\n",
"Format: | ID | Name | Max Budget |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oiED0usegPGf"
},
"outputs": [],
"source": [
"async def import_sheet():\n",
" tasks = []\n",
" http_client = HTTPHandler()\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for row in csv_reader:\n",
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
" tasks.append(task)\n",
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
"\n",
" keys = await asyncio.gather(*tasks)\n",
"\n",
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
" csv_writer.writeheader()\n",
"\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for i, row in enumerate(csv_reader):\n",
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
" csv_writer.writerow(row)\n",
"\n",
" await http_client.close()\n",
"\n",
"asyncio.run(import_sheet())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E7M0Li_UgJeZ"
},
"source": [
"# Create Users + Keys\n",
"\n",
"- Creates a user\n",
"- Creates a key with max budget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZudRFujf7j-"
},
"outputs": [],
"source": [
"\n",
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"key/generate\"\n",
"\n",
" # call /key/generate\n",
" print(\"CALLING /KEY/GENERATE\")\n",
" response = await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"key_alias\": f\"{user_id}-key\",\n",
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
" })\n",
" )\n",
" print(f\"response: {response.text}\")\n",
" return response.json()[\"key\"]\n",
"\n",
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
" \"\"\"\n",
" - call /user/new\n",
" - create key for user\n",
" \"\"\"\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"user/new\"\n",
"\n",
" # call /user/new\n",
" await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"user_alias\": user_name,\n",
" \"auto_create_key\": False,\n",
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
" })\n",
" )\n",
"\n",
" # create key for user\n",
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

File diff suppressed because it is too large Load diff

View file

@ -1,159 +1,157 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "markdown",
"colab": { "metadata": {
"provenance": [] "id": "eKXncoQbU_2j"
}, },
"kernelspec": { "source": [
"name": "python3", "# Using Nemo-Guardrails with LiteLLM Server\n",
"display_name": "Python 3" "\n",
}, "[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
"language_info": { ]
"name": "python"
}
}, },
"cells": [ {
{ "cell_type": "markdown",
"cell_type": "markdown", "metadata": {
"source": [ "id": "ZciYaLwvuFbu"
"# Using Nemo-Guardrails with LiteLLM Server\n", },
"\n", "source": [
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)" "## Using with Bedrock\n",
], "\n",
"metadata": { "`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
"id": "eKXncoQbU_2j" ]
} },
}, {
{ "cell_type": "code",
"cell_type": "markdown", "execution_count": null,
"source": [ "metadata": {
"## Using with Bedrock\n", "id": "vOUwGSJ2Vsy3"
"\n", },
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`" "outputs": [],
], "source": [
"metadata": { "pip install nemoguardrails langchain"
"id": "ZciYaLwvuFbu" ]
} },
}, {
{ "cell_type": "code",
"cell_type": "code", "execution_count": null,
"source": [ "metadata": {
"pip install nemoguardrails langchain" "id": "xXEJNxe7U0IN"
], },
"metadata": { "outputs": [],
"id": "vOUwGSJ2Vsy3" "source": [
}, "from langchain.chat_models import ChatOpenAI\n",
"execution_count": null, "\n",
"outputs": [] "llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
}, "\n",
{ "from nemoguardrails import LLMRails, RailsConfig\n",
"cell_type": "code", "\n",
"execution_count": null, "config = RailsConfig.from_path(\"./config.yml\")\n",
"metadata": { "app = LLMRails(config, llm=llm)\n",
"id": "xXEJNxe7U0IN" "\n",
}, "new_message = app.generate(messages=[{\n",
"outputs": [], " \"role\": \"user\",\n",
"source": [ " \"content\": \"Hello! What can you do for me?\"\n",
"import openai\n", "}])"
"from langchain.chat_models import ChatOpenAI\n", ]
"\n", },
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n", {
"\n", "cell_type": "markdown",
"from nemoguardrails import LLMRails, RailsConfig\n", "metadata": {
"\n", "id": "vz5n00qyuKjp"
"config = RailsConfig.from_path(\"./config.yml\")\n", },
"app = LLMRails(config, llm=llm)\n", "source": [
"\n", "## Using with TogetherAI\n",
"new_message = app.generate(messages=[{\n", "\n",
" \"role\": \"user\",\n", "1. You can either set this in the server environment:\n",
" \"content\": \"Hello! What can you do for me?\"\n", "`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
"}])" "\n",
] "2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
}, ]
{ },
"cell_type": "markdown", {
"source": [ "cell_type": "code",
"## Using with TogetherAI\n", "execution_count": null,
"\n", "metadata": {
"1. You can either set this in the server environment:\n", "id": "XK1sk-McuhpE"
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n", },
"\n", "outputs": [],
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`" "source": [
], "from langchain.chat_models import ChatOpenAI\n",
"metadata": { "\n",
"id": "vz5n00qyuKjp" "llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
} "\n",
}, "from nemoguardrails import LLMRails, RailsConfig\n",
{ "\n",
"cell_type": "code", "config = RailsConfig.from_path(\"./config.yml\")\n",
"source": [ "app = LLMRails(config, llm=llm)\n",
"import openai\n", "\n",
"from langchain.chat_models import ChatOpenAI\n", "new_message = app.generate(messages=[{\n",
"\n", " \"role\": \"user\",\n",
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n", " \"content\": \"Hello! What can you do for me?\"\n",
"\n", "}])"
"from nemoguardrails import LLMRails, RailsConfig\n", ]
"\n", },
"config = RailsConfig.from_path(\"./config.yml\")\n", {
"app = LLMRails(config, llm=llm)\n", "cell_type": "markdown",
"\n", "metadata": {
"new_message = app.generate(messages=[{\n", "id": "8A1KWKnzuxAS"
" \"role\": \"user\",\n", },
" \"content\": \"Hello! What can you do for me?\"\n", "source": [
"}])" "### CONFIG.YML\n",
], "\n",
"metadata": { "save this example `config.yml` in your current directory"
"id": "XK1sk-McuhpE" ]
}, },
"execution_count": null, {
"outputs": [] "cell_type": "code",
}, "execution_count": null,
{ "metadata": {
"cell_type": "markdown", "id": "NKN1GmSvu0Cx"
"source": [ },
"### CONFIG.YML\n", "outputs": [],
"\n", "source": [
"save this example `config.yml` in your current directory" "# instructions:\n",
], "# - type: general\n",
"metadata": { "# content: |\n",
"id": "8A1KWKnzuxAS" "# Below is a conversation between a bot and a user about the recent job reports.\n",
} "# The bot is factual and concise. If the bot does not know the answer to a\n",
}, "# question, it truthfully says it does not know.\n",
{ "\n",
"cell_type": "code", "# sample_conversation: |\n",
"source": [ "# user \"Hello there!\"\n",
"# instructions:\n", "# express greeting\n",
"# - type: general\n", "# bot express greeting\n",
"# content: |\n", "# \"Hello! How can I assist you today?\"\n",
"# Below is a conversation between a bot and a user about the recent job reports.\n", "# user \"What can you do for me?\"\n",
"# The bot is factual and concise. If the bot does not know the answer to a\n", "# ask about capabilities\n",
"# question, it truthfully says it does not know.\n", "# bot respond about capabilities\n",
"\n", "# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
"# sample_conversation: |\n", "# user \"What's 2+2?\"\n",
"# user \"Hello there!\"\n", "# ask math question\n",
"# express greeting\n", "# bot responds to math question\n",
"# bot express greeting\n", "# \"2+2 is equal to 4.\"\n",
"# \"Hello! How can I assist you today?\"\n", "\n",
"# user \"What can you do for me?\"\n", "# models:\n",
"# ask about capabilities\n", "# - type: main\n",
"# bot respond about capabilities\n", "# engine: openai\n",
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n", "# model: claude-instant-1"
"# user \"What's 2+2?\"\n", ]
"# ask math question\n", }
"# bot responds to math question\n", ],
"# \"2+2 is equal to 4.\"\n", "metadata": {
"\n", "colab": {
"# models:\n", "provenance": []
"# - type: main\n", },
"# engine: openai\n", "kernelspec": {
"# model: claude-instant-1" "display_name": "Python 3",
], "name": "python3"
"metadata": { },
"id": "NKN1GmSvu0Cx" "language_info": {
}, "name": "python"
"execution_count": null, }
"outputs": [] },
} "nbformat": 4,
] "nbformat_minor": 0
} }

View file

@ -1,16 +1,12 @@
import sys, os
import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import litellm import litellm
from litellm import embedding, completion, completion_cost
from autoevals.llm import * from autoevals.llm import *
################### ###################
import litellm
# litellm completion call # litellm completion call
question = "which country has the highest population" question = "which country has the highest population"

View file

@ -1,11 +1,12 @@
import traceback import traceback
from flask import Flask, request, jsonify, abort, Response from flask import Flask, request, Response
from flask_cors import CORS from flask_cors import CORS
import traceback
import litellm import litellm
from util import handle_error from util import handle_error
from litellm import completion from litellm import completion
import os, dotenv, time import os
import dotenv
import time
import json import json
dotenv.load_dotenv() dotenv.load_dotenv()
@ -20,9 +21,9 @@ verbose = True
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/ # litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
######### PROMPT LOGGING ########## ######### PROMPT LOGGING ##########
os.environ[ os.environ["PROMPTLAYER_API_KEY"] = (
"PROMPTLAYER_API_KEY" "" # set your promptlayer key here - https://promptlayer.com/
] = "" # set your promptlayer key here - https://promptlayer.com/ )
# set callbacks # set callbacks
litellm.success_callback = ["promptlayer"] litellm.success_callback = ["promptlayer"]
@ -57,9 +58,9 @@ def api_completion():
try: try:
if "prompt" not in data: if "prompt" not in data:
raise ValueError("data needs to have prompt") raise ValueError("data needs to have prompt")
data[ data["model"] = (
"model" "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct )
# COMPLETION CALL # COMPLETION CALL
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that." system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
messages = [ messages = [
@ -75,7 +76,7 @@ def api_completion():
"stream" in data and data["stream"] == True "stream" in data and data["stream"] == True
): # use generate_responses to stream responses ): # use generate_responses to stream responses
return Response(data_generator(response), mimetype="text/event-stream") return Response(data_generator(response), mimetype="text/event-stream")
except Exception as e: except Exception:
# call handle_error function # call handle_error function
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}") print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
## LOG FAILURE ## LOG FAILURE

View file

@ -1,5 +1,4 @@
import requests import requests
from urllib.parse import urlparse, parse_qs
def get_next_url(response): def get_next_url(response):

View file

@ -1,238 +1,237 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "markdown",
"colab": { "metadata": {
"provenance": [] "id": "gZx-wHJapG5w"
}, },
"kernelspec": { "source": [
"name": "python3", "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
"display_name": "Python 3" "\n",
}, "* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"language_info": { "* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
"name": "python" "* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
} "\n",
"\n",
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
"Example call\n",
"```python\n",
"model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```"
]
}, },
"cells": [ {
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4JSRa0QVogPo"
},
"outputs": [],
"source": [
"!pip install litellm==0.1.399\n",
"!pip install baseten urllib3"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "VEukLhDzo4vw"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4STYM2OHFNlc"
},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"id": "DorpLxw1FHbC"
},
"outputs": [],
"source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "syF3dTdKFSQQ"
},
"source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
},
"outputs": [
{ {
"cell_type": "markdown", "name": "stderr",
"source": [ "output_type": "stream",
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n", "text": [
"\n", "\u001b[32mINFO\u001b[0m API key set.\n",
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n", "INFO:baseten:API key set.\n"
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n", ]
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
"\n",
"\n",
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
"Example call\n",
"```python\n",
"model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```"
],
"metadata": {
"id": "gZx-wHJapG5w"
}
}, },
{ {
"cell_type": "code", "data": {
"execution_count": null, "text/plain": [
"metadata": { "{'choices': [{'finish_reason': 'stop',\n",
"id": "4JSRa0QVogPo" " 'index': 0,\n",
}, " 'message': {'role': 'assistant',\n",
"outputs": [], " 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
"source": [ " 'created': 1692135883.699066,\n",
"!pip install litellm==0.1.399\n", " 'model': 'qvv0xeq'}"
"!pip install baseten urllib3"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"import litellm\n",
"from litellm import completion"
],
"metadata": {
"id": "VEukLhDzo4vw"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Setup"
],
"metadata": {
"id": "4STYM2OHFNlc"
}
},
{
"cell_type": "code",
"source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
],
"metadata": {
"id": "DorpLxw1FHbC"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "syF3dTdKFSQQ"
}
},
{
"cell_type": "code",
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
" 'created': 1692135883.699066,\n",
" 'model': 'qvv0xeq'}"
]
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "7n21UroEGCGa"
}
},
{
"cell_type": "code",
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
},
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
" 'created': 1692135900.2806294,\n",
" 'model': 'q841o8w'}"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "6-TFwmPAGPXq"
}
},
{
"cell_type": "code",
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
" 'created': 1692135914.7472186,\n",
" 'model': '31dxrj3'}"
]
},
"metadata": {},
"execution_count": 20
}
] ]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
} }
] ],
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7n21UroEGCGa"
},
"source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
" 'created': 1692135900.2806294,\n",
" 'model': 'q841o8w'}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6-TFwmPAGPXq"
},
"source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
" 'created': 1692135914.7472186,\n",
" 'model': '31dxrj3'}"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,201 +1,195 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "markdown",
"colab": { "metadata": {
"provenance": [] "id": "5hwntUxTMxEk"
}, },
"kernelspec": { "source": [
"name": "python3", "# Langchain liteLLM Demo Notebook\n",
"display_name": "Python 3" "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
}, "Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
"language_info": { "\n",
"name": "python" "Call all LLM models using the same I/O interface\n",
} "\n",
"Example usage\n",
"```python\n",
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```"
]
}, },
"cells": [ {
{ "cell_type": "code",
"cell_type": "markdown", "execution_count": null,
"source": [ "metadata": {
"# Langchain liteLLM Demo Notebook\n", "id": "aPNAUsCvB6Sv"
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n", },
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n", "outputs": [],
"\n", "source": [
"Call all LLM models using the same I/O interface\n", "!pip install litellm langchain"
"\n", ]
"Example usage\n", },
"```python\n", {
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n", "cell_type": "code",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n", "execution_count": 2,
"ChatLiteLLM(model=\"command-nightly\")\n", "metadata": {
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n", "id": "MOhRaVnhB-0J"
"```" },
], "outputs": [],
"metadata": { "source": [
"id": "5hwntUxTMxEk" "import os\n",
} "from langchain.chat_models import ChatLiteLLM\n",
"from langchain.schema import HumanMessage"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
}, },
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"outputs": [
{ {
"cell_type": "code", "data": {
"execution_count": null, "text/plain": [
"metadata": { "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
"id": "aPNAUsCvB6Sv"
},
"outputs": [],
"source": [
"!pip install litellm langchain"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
],
"metadata": {
"id": "MOhRaVnhB-0J"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 30
}
] ]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
} }
] ],
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -43,7 +43,7 @@
"source": [ "source": [
"# set you Vertex AI configs\n", "# set you Vertex AI configs\n",
"import litellm\n", "import litellm\n",
"from litellm import embedding, completion\n", "from litellm import completion\n",
"\n", "\n",
"litellm.vertex_project = \"hardy-device-386718\"\n", "litellm.vertex_project = \"hardy-device-386718\"\n",
"litellm.vertex_location = \"us-central1\"" "litellm.vertex_location = \"us-central1\""

View file

@ -1,331 +1,331 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "markdown",
"colab": { "metadata": {
"provenance": [] "id": "vnvlwUDZK7VA"
}, },
"kernelspec": { "source": [
"name": "python3", "## Demo Notebook of Function Calling with liteLLM\n",
"display_name": "Python 3" "- Supported Providers for Function Calling\n",
}, " - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
"language_info": { "- In this notebook we use function calling with `litellm.completion()`"
"name": "python" ]
}
}, },
"cells": [ {
{ "cell_type": "code",
"cell_type": "markdown", "execution_count": null,
"source": [ "metadata": {
"## Demo Notebook of Function Calling with liteLLM\n", "id": "KrINCwRfLgZV"
"- Supported Providers for Function Calling\n", },
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n", "outputs": [],
"- In this notebook we use function calling with `litellm.completion()`" "source": [
], "## Install liteLLM\n",
"metadata": { "!pip install litellm"
"id": "vnvlwUDZK7VA" ]
} },
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "nK7zR5OgLlh2"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"id": "dCQlyBxKLqbA"
},
"outputs": [],
"source": [
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "gfdGv-FMRCdX"
},
"source": [
"## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "ERzsP1sfM19C"
},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
"]\n",
"\n",
"def get_current_weather(location):\n",
" if location == \"Boston, MA\":\n",
" return \"The weather is 12F\"\n",
"\n",
"functions = [\n",
" {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
" },\n",
" \"unit\": {\n",
" \"type\": \"string\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
" }\n",
" },\n",
" \"required\": [\"location\"]\n",
" }\n",
" }\n",
" ]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NX6by2VuRPnp"
},
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
}, },
"id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
},
"outputs": [
{ {
"cell_type": "code", "name": "stdout",
"source": [ "output_type": "stream",
"## Install liteLLM\n", "text": [
"!pip install litellm" "{\n",
], " \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
"metadata": { " \"object\": \"chat.completion\",\n",
"id": "KrINCwRfLgZV" " \"created\": 1691801223,\n",
}, " \"model\": \"gpt-3.5-turbo-0613\",\n",
"execution_count": null, " \"choices\": [\n",
"outputs": [] " {\n",
}, " \"index\": 0,\n",
{ " \"message\": {\n",
"cell_type": "code", " \"role\": \"assistant\",\n",
"source": [ " \"content\": null,\n",
"import os, litellm\n", " \"function_call\": {\n",
"from litellm import completion" " \"name\": \"get_current_weather\",\n",
], " \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"metadata": { " }\n",
"id": "nK7zR5OgLlh2" " },\n",
}, " \"finish_reason\": \"function_call\"\n",
"execution_count": 2, " }\n",
"outputs": [] " ],\n",
}, " \"usage\": {\n",
{ " \"prompt_tokens\": 82,\n",
"cell_type": "code", " \"completion_tokens\": 18,\n",
"source": [ " \"total_tokens\": 100\n",
"os.environ['OPENAI_API_KEY'] = \"\" #@param" " }\n",
], "}\n"
"metadata": { ]
"id": "dCQlyBxKLqbA"
},
"execution_count": 27,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
],
"metadata": {
"id": "gfdGv-FMRCdX"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
"]\n",
"\n",
"def get_current_weather(location):\n",
" if location == \"Boston, MA\":\n",
" return \"The weather is 12F\"\n",
"\n",
"functions = [\n",
" {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
" },\n",
" \"unit\": {\n",
" \"type\": \"string\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
" }\n",
" },\n",
" \"required\": [\"location\"]\n",
" }\n",
" }\n",
" ]"
],
"metadata": {
"id": "ERzsP1sfM19C"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
],
"metadata": {
"id": "NX6by2VuRPnp"
}
},
{
"cell_type": "code",
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801223,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": null,\n",
" \"function_call\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
" }\n",
" },\n",
" \"finish_reason\": \"function_call\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 82,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 100\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call"
],
"metadata": {
"id": "Yu0o2saDNLx8"
}
},
{
"cell_type": "code",
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"}"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"get_current_weather {'location': 'Boston, MA'}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Call the get_current_weather() function"
],
"metadata": {
"id": "z3tstH_yN3fX"
}
},
{
"cell_type": "code",
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
},
"execution_count": 24,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"12F\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Send the response from get_current_weather back to the model to summarize"
],
"metadata": {
"id": "k4HGJE3NRmMI"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801963,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 109,\n",
" \"completion_tokens\": 12,\n",
" \"total_tokens\": 121\n",
" }\n",
"}\n"
]
}
]
} }
] ],
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Yu0o2saDNLx8"
},
"source": [
"## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
},
"outputs": [
{
"data": {
"text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"get_current_weather {'location': 'Boston, MA'}\n"
]
}
],
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "z3tstH_yN3fX"
},
"source": [
"## Call the get_current_weather() function"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"12F\n"
]
}
],
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "k4HGJE3NRmMI"
},
"source": [
"## Send the response from get_current_weather back to the model to summarize"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801963,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 109,\n",
" \"completion_tokens\": 12,\n",
" \"total_tokens\": 121\n",
" }\n",
"}\n"
]
}
],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,13 +1,13 @@
import openai import openai
api_base = f"http://0.0.0.0:8000" api_base = "http://0.0.0.0:8000"
openai.api_base = api_base openai.api_base = api_base
openai.api_key = "temp-key" openai.api_key = "temp-key"
print(openai.api_base) print(openai.api_base)
print(f"LiteLLM: response from proxy with streaming") print("LiteLLM: response from proxy with streaming")
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
model="ollama/llama2", model="ollama/llama2",
messages=[ messages=[

File diff suppressed because one or more lines are too long

View file

@ -1,52 +1,51 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "code",
"colab": { "execution_count": null,
"provenance": [] "metadata": {
}, "id": "j6yJsCGeaq8G"
"kernelspec": { },
"name": "python3", "outputs": [],
"display_name": "Python 3" "source": [
}, "!pip install litellm"
"language_info": { ]
"name": "python"
}
}, },
"cells": [ {
{ "cell_type": "code",
"cell_type": "code", "execution_count": null,
"source": [ "metadata": {
"!pip install litellm" "id": "u129iWNPaf72"
], },
"metadata": { "outputs": [],
"id": "j6yJsCGeaq8G" "source": [
}, "from litellm import completion\n",
"execution_count": null, "\n",
"outputs": [] "model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
}, "\n",
{ "user_message = \"Hello, how are you?\"\n",
"cell_type": "code", "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"execution_count": null, "\n",
"metadata": { "for model in model_fallback_list:\n",
"id": "u129iWNPaf72" " try:\n",
}, " response = completion(model=model, messages=messages)\n",
"outputs": [], " except Exception:\n",
"source": [ " print(f\"error occurred: {traceback.format_exc()}\")"
"import litellm\n", ]
"from litellm import embedding, completion\n", }
"\n", ],
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n", "metadata": {
"\n", "colab": {
"user_message = \"Hello, how are you?\"\n", "provenance": []
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n", },
"\n", "kernelspec": {
"for model in model_fallback_list:\n", "display_name": "Python 3",
" try:\n", "name": "python3"
" response = completion(model=model, messages=messages)\n", },
" except Exception as e:\n", "language_info": {
" print(f\"error occurred: {traceback.format_exc()}\")" "name": "python"
] }
} },
] "nbformat": 4,
"nbformat_minor": 0
} }

View file

@ -1,14 +1,12 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
from litellm import Router from litellm import Router
import litellm import litellm
@ -137,7 +135,7 @@ for future in futures:
else: else:
failed_calls += 1 failed_calls += 1
print(f"Load test Summary:") print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}") print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}") print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}") print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
from litellm import Router from litellm import Router
import litellm import litellm
@ -160,7 +158,7 @@ for future in futures:
else: else:
failed_calls += 1 failed_calls += 1
print(f"Load test Summary:") print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}") print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}") print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}") print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
from litellm import Router from litellm import Router
import litellm import litellm
@ -132,7 +130,7 @@ for future in futures:
else: else:
failed_calls += 1 failed_calls += 1
print(f"Load test Summary:") print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}") print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}") print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}") print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,9 @@
from fastapi import FastAPI from fastapi import FastAPI
import uvicorn import uvicorn
from memory_profiler import profile, memory_usage from memory_profiler import profile
import os import os
import traceback
import asyncio
import pytest
import litellm import litellm
from litellm import Router from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv from dotenv import load_dotenv
import uuid import uuid

View file

@ -1,17 +1,16 @@
#### What this tests #### #### What this tests ####
from memory_profiler import profile, memory_usage from memory_profiler import profile
import sys, os, time import sys
import traceback, asyncio import os
import pytest import time
import asyncio
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import litellm import litellm
from litellm import Router from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv from dotenv import load_dotenv
import uuid import uuid

View file

@ -1,17 +1,16 @@
#### What this tests #### #### What this tests ####
from memory_profiler import profile, memory_usage from memory_profiler import profile
import sys, os, time import sys
import traceback, asyncio import os
import pytest import time
import asyncio
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import litellm import litellm
from litellm import Router from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv from dotenv import load_dotenv
import uuid import uuid

View file

@ -1,17 +1,14 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
import copy
load_dotenv() load_dotenv()
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import asyncio import asyncio
from litellm import Router, Timeout from litellm import Timeout
import time import time
from litellm.caching.caching import Cache
import litellm
import openai import openai
### Test just calling AsyncAzureOpenAI ### Test just calling AsyncAzureOpenAI

View file

@ -1,7 +1,6 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
import copy
load_dotenv() load_dotenv()
sys.path.insert( sys.path.insert(

View file

@ -1,7 +1,6 @@
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
import copy
load_dotenv() load_dotenv()
sys.path.insert( sys.path.insert(

View file

@ -1,5 +1,4 @@
import requests import requests
import json
def get_initial_config(): def get_initial_config():

View file

@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):
litellm_model_name = litellm_params.get("model", "") or "" litellm_model_name = litellm_params.get("model", "") or ""
if "vertex_ai/" in litellm_model_name: if "vertex_ai/" in litellm_model_name:
print(f"\033[91m\nSkipping Vertex AI model\033[0m", model) print("\033[91m\nSkipping Vertex AI model\033[0m", model)
continue continue
for param, value in litellm_params.items(): for param, value in litellm_params.items():

View file

@ -1,7 +1,6 @@
import os import os
from openai import OpenAI from openai import OpenAI
from dotenv import load_dotenv from dotenv import load_dotenv
import httpx
import concurrent.futures import concurrent.futures
load_dotenv() load_dotenv()

View file

@ -2,21 +2,16 @@
import json import json
import boto3 import boto3
import sys, os import sys
import traceback import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io import io
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest
import litellm
import io
import json
class TokenIterator: class TokenIterator:
@ -48,7 +43,6 @@ payload = {
"stream": True, "stream": True,
} }
import boto3
client = boto3.client("sagemaker-runtime", region_name="us-west-2") client = boto3.client("sagemaker-runtime", region_name="us-west-2")
response = client.invoke_endpoint_with_response_stream( response = client.invoke_endpoint_with_response_stream(

View file

@ -111,7 +111,6 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import mlflow\n",
"mlflow.langchain.autolog()" "mlflow.langchain.autolog()"
] ]
}, },

View file

@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
""" """
import asyncio import asyncio
import os
# Enter your DATABASE_URL here # Enter your DATABASE_URL here
@ -33,7 +32,7 @@ async def check_view_exists(): # noqa: PLR0915
# Try to select one row from the view # Try to select one row from the view
await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
print("LiteLLM_VerificationTokenView Exists!") # noqa print("LiteLLM_VerificationTokenView Exists!") # noqa
except Exception as e: except Exception:
# If an error occurs, the view does not exist, so create it # If an error occurs, the view does not exist, so create it
await db.execute_raw( await db.execute_raw(
""" """
@ -54,7 +53,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
print("MonthlyGlobalSpend Exists!") # noqa print("MonthlyGlobalSpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
SELECT SELECT
@ -74,7 +73,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
print("Last30dKeysBySpend Exists!") # noqa print("Last30dKeysBySpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
SELECT SELECT
@ -102,7 +101,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
print("Last30dModelsBySpend Exists!") # noqa print("Last30dModelsBySpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
SELECT SELECT
@ -124,7 +123,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
print("MonthlyGlobalSpendPerKey Exists!") # noqa print("MonthlyGlobalSpendPerKey Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS
SELECT SELECT
@ -147,7 +146,7 @@ async def check_view_exists(): # noqa: PLR0915
"""SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1""" """SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
) )
print("MonthlyGlobalSpendPerUserPerKey Exists!") # noqa print("MonthlyGlobalSpendPerUserPerKey Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS
SELECT SELECT
@ -171,7 +170,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""") await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
print("DailyTagSpend Exists!") # noqa print("DailyTagSpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE OR REPLACE VIEW DailyTagSpend AS CREATE OR REPLACE VIEW DailyTagSpend AS
SELECT SELECT
@ -189,7 +188,7 @@ async def check_view_exists(): # noqa: PLR0915
try: try:
await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""") await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
print("Last30dTopEndUsersSpend Exists!") # noqa print("Last30dTopEndUsersSpend Exists!") # noqa
except Exception as e: except Exception:
sql_query = """ sql_query = """
CREATE VIEW "Last30dTopEndUsersSpend" AS CREATE VIEW "Last30dTopEndUsersSpend" AS
SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend

View file

@ -17,7 +17,7 @@ async def log_event(request: Request):
# For now, just printing the received data # For now, just printing the received data
return {"message": "Request received successfully"} return {"message": "Request received successfully"}
except Exception as e: except Exception:
raise HTTPException(status_code=500, detail="Internal Server Error") raise HTTPException(status_code=500, detail="Internal Server Error")

View file

@ -2,12 +2,10 @@
#### What this does #### #### What this does ####
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import dotenv, os import os
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching.caching import DualCache
from typing import Literal, Union, Optional from typing import Optional
import traceback import traceback
@ -15,10 +13,8 @@ import traceback
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import dotenv, os import litellm
import traceback import uuid
import datetime, subprocess, sys
import litellm, uuid
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger

View file

@ -11,9 +11,9 @@ import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from typing import Optional, Literal, Union, Any from typing import Optional, Literal, Any
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_guardrail import CustomGuardrail
from fastapi import HTTPException from fastapi import HTTPException
@ -23,14 +23,10 @@ from litellm.litellm_core_utils.logging_utils import (
convert_litellm_response_object_to_str, convert_litellm_response_object_to_str,
) )
from typing import List from typing import List
from datetime import datetime
import aiohttp, asyncio
from litellm._logging import verbose_proxy_logger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
import httpx
import json import json
from litellm.types.guardrails import GuardrailEventHooks from litellm.types.guardrails import GuardrailEventHooks
@ -147,7 +143,6 @@ class AporiaGuardrail(CustomGuardrail):
from litellm.proxy.common_utils.callback_utils import ( from litellm.proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header, add_guardrail_to_applied_guardrails_header,
) )
from litellm.types.guardrails import GuardrailEventHooks
""" """
Use this for the post call moderation with Guardrails Use this for the post call moderation with Guardrails
@ -183,7 +178,6 @@ class AporiaGuardrail(CustomGuardrail):
from litellm.proxy.common_utils.callback_utils import ( from litellm.proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header, add_guardrail_to_applied_guardrails_header,
) )
from litellm.types.guardrails import GuardrailEventHooks
event_type: GuardrailEventHooks = GuardrailEventHooks.during_call event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
if self.should_run_guardrail(data=data, event_type=event_type) is not True: if self.should_run_guardrail(data=data, event_type=event_type) is not True:

View file

@ -7,14 +7,13 @@
## Reject a call / response if it contains certain keywords ## Reject a call / response if it contains certain keywords
from typing import Optional, Literal from typing import Literal
import litellm import litellm
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException from fastapi import HTTPException
import json, traceback
class _ENTERPRISE_BannedKeywords(CustomLogger): class _ENTERPRISE_BannedKeywords(CustomLogger):
@ -73,7 +72,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
- check if user id part of call - check if user id part of call
- check if user id part of blocked list - check if user id part of blocked list
""" """
self.print_verbose(f"Inside Banned Keyword List Pre-Call Hook") self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
if call_type == "completion" and "messages" in data: if call_type == "completion" and "messages" in data:
for m in data["messages"]: for m in data["messages"]:
if "content" in m and isinstance(m["content"], str): if "content" in m and isinstance(m["content"], str):

View file

@ -15,7 +15,6 @@ from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException from fastapi import HTTPException
import json, traceback
class _ENTERPRISE_BlockedUserList(CustomLogger): class _ENTERPRISE_BlockedUserList(CustomLogger):
@ -69,7 +68,7 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
- check if end-user in cache - check if end-user in cache
- check if end-user in db - check if end-user in db
""" """
self.print_verbose(f"Inside Blocked User List Pre-Call Hook") self.print_verbose("Inside Blocked User List Pre-Call Hook")
if "user_id" in data or "user" in data: if "user_id" in data or "user" in data:
user = data.get("user_id", data.get("user", "")) user = data.get("user_id", data.get("user", ""))
if ( if (

View file

@ -7,21 +7,12 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
from typing import Optional, Literal, Union from typing import Literal
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
class _ENTERPRISE_GoogleTextModeration(CustomLogger): class _ENTERPRISE_GoogleTextModeration(CustomLogger):

View file

@ -7,28 +7,24 @@
# +-------------------------------------------------------------+ # +-------------------------------------------------------------+
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
import sys, os import sys
import os
from collections.abc import Iterable from collections.abc import Iterable
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from typing import Optional, Literal, Union from typing import Optional, Literal
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.types.utils import ( from litellm.types.utils import (
ModelResponse, ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
Choices, Choices,
) )
from datetime import datetime
import aiohttp, asyncio
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -7,26 +7,13 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
## This provides an LLM Guard Integration for content moderation on the proxy ## This provides an LLM Guard Integration for content moderation on the proxy
from typing import Optional, Literal, Union from typing import Optional, Literal
import litellm import litellm
import traceback
import sys
import uuid
import os
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp import aiohttp
import asyncio
from litellm.utils import get_formatted_prompt from litellm.utils import get_formatted_prompt
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
@ -164,7 +151,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
"moderation", "moderation",
"audio_transcription", "audio_transcription",
] ]
except Exception as e: except Exception:
self.print_verbose( self.print_verbose(
f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']" f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
) )

View file

@ -5,27 +5,19 @@
# +-------------------------------------------------------------+ # +-------------------------------------------------------------+
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
import sys, os import sys
import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from typing import Optional, Literal, Union from typing import Literal
import litellm, traceback, sys, uuid import litellm
from litellm.caching.caching import DualCache import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
from litellm._logging import verbose_proxy_logger
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -471,8 +471,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
data: dict, data: dict,
call_type: str, # "completion", "embeddings", "image_generation", "moderation" call_type: str, # "completion", "embeddings", "image_generation", "moderation"
): ):
from detect_secrets import SecretsCollection
from detect_secrets.settings import default_settings
if await self.should_run_check(user_api_key_dict) is False: if await self.should_run_check(user_api_key_dict) is False:
return return

View file

@ -1,6 +1,5 @@
# Enterprise Proxy Util Endpoints # Enterprise Proxy Util Endpoints
from typing import Optional, List from typing import Optional, List
from litellm._logging import verbose_logger
from litellm.proxy.proxy_server import PrismaClient, HTTPException from litellm.proxy.proxy_server import PrismaClient, HTTPException
from litellm.llms.custom_httpx.http_handler import HTTPHandler from litellm.llms.custom_httpx.http_handler import HTTPHandler
import collections import collections
@ -116,7 +115,7 @@ async def ui_get_spend_by_tags(
def _forecast_daily_cost(data: list): def _forecast_daily_cost(data: list):
from datetime import datetime, timedelta from datetime import timedelta
if len(data) == 0: if len(data) == 0:
return { return {

View file

@ -1063,9 +1063,9 @@ from .llms.sagemaker.chat.transformation import SagemakerChatConfig
from .llms.ollama_chat import OllamaChatConfig from .llms.ollama_chat import OllamaChatConfig
from .llms.bedrock.chat.invoke_handler import ( from .llms.bedrock.chat.invoke_handler import (
AmazonCohereChatConfig, AmazonCohereChatConfig,
AmazonConverseConfig,
bedrock_tool_name_mappings, bedrock_tool_name_mappings,
) )
from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
from .llms.bedrock.common_utils import ( from .llms.bedrock.common_utils import (
AmazonTitanConfig, AmazonTitanConfig,
AmazonAI21Config, AmazonAI21Config,

View file

@ -1,7 +1,6 @@
import json import json
import logging import logging
import os import os
import traceback
from datetime import datetime from datetime import datetime
from logging import Formatter from logging import Formatter

View file

@ -12,12 +12,11 @@ import json
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
import os import os
from typing import Dict, List, Optional, Union from typing import List, Optional, Union
import redis # type: ignore import redis # type: ignore
import redis.asyncio as async_redis # type: ignore import redis.asyncio as async_redis # type: ignore
import litellm
from litellm import get_secret, get_secret_str from litellm import get_secret, get_secret_str
from ._logging import verbose_logger from ._logging import verbose_logger

View file

@ -1,23 +1,12 @@
# What is this? # What is this?
## Translates OpenAI call to Anthropic `/v1/messages` format ## Translates OpenAI call to Anthropic `/v1/messages` format
import json
import os
import traceback import traceback
import uuid from typing import Any, Optional
from typing import Any, Literal, Optional
import dotenv
import httpx
from pydantic import BaseModel
import litellm import litellm
from litellm import ChatCompletionRequest, verbose_logger from litellm import ChatCompletionRequest, verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.types.llms.anthropic import ( from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
AnthropicMessagesRequest,
AnthropicResponse,
ContentBlockDelta,
)
from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse

View file

@ -7,12 +7,11 @@ from functools import partial
from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
import httpx import httpx
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI from openai import AsyncOpenAI, OpenAI
from openai.types.beta.assistant import Assistant from openai.types.beta.assistant import Assistant
from openai.types.beta.assistant_deleted import AssistantDeleted from openai.types.beta.assistant_deleted import AssistantDeleted
import litellm import litellm
from litellm.llms.azure import assistants
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.utils import ( from litellm.utils import (
exception_type, exception_type,

View file

@ -144,7 +144,6 @@ def batch_completion_models(*args, **kwargs):
This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models. This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
It sends requests concurrently and returns the response from the first model that responds. It sends requests concurrently and returns the response from the first model that responds.
""" """
import concurrent
if "model" in kwargs: if "model" in kwargs:
kwargs.pop("model") kwargs.pop("model")

View file

@ -19,24 +19,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union
import httpx import httpx
import litellm import litellm
from litellm import client
from litellm.llms.azure.azure import AzureBatchesAPI from litellm.llms.azure.azure import AzureBatchesAPI
from litellm.llms.openai.openai import OpenAIBatchesAPI from litellm.llms.openai.openai import OpenAIBatchesAPI
from litellm.llms.vertex_ai.batches.handler import ( from litellm.llms.vertex_ai.batches.handler import VertexAIBatchPrediction
VertexAIBatchPrediction, from litellm.secret_managers.main import get_secret_str
) from litellm.types.llms.openai import Batch, CreateBatchRequest, RetrieveBatchRequest
from litellm.secret_managers.main import get_secret, get_secret_str
from litellm.types.llms.openai import (
Batch,
CancelBatchRequest,
CreateBatchRequest,
CreateFileRequest,
FileContentRequest,
FileObject,
FileTypes,
HttpxBinaryResponseContent,
RetrieveBatchRequest,
)
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.utils import supports_httpx_timeout from litellm.utils import supports_httpx_timeout

View file

@ -11,7 +11,7 @@ import json
import os import os
import threading import threading
import time import time
from typing import Literal, Optional, Union from typing import Literal, Optional
import litellm import litellm
from litellm.utils import ModelResponse from litellm.utils import ModelResponse

View file

@ -8,16 +8,12 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
import ast import ast
import asyncio
import hashlib import hashlib
import inspect
import io
import json import json
import logging
import time import time
import traceback import traceback
from enum import Enum from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union from typing import Any, Dict, List, Optional, Set, Union
from openai.types.audio.transcription_create_params import TranscriptionCreateParams from openai.types.audio.transcription_create_params import TranscriptionCreateParams
from openai.types.chat.completion_create_params import ( from openai.types.chat.completion_create_params import (
@ -41,7 +37,7 @@ from litellm.types.utils import all_litellm_params
from .base_cache import BaseCache from .base_cache import BaseCache
from .disk_cache import DiskCache from .disk_cache import DiskCache
from .dual_cache import DualCache from .dual_cache import DualCache # noqa
from .in_memory_cache import InMemoryCache from .in_memory_cache import InMemoryCache
from .qdrant_semantic_cache import QdrantSemanticCache from .qdrant_semantic_cache import QdrantSemanticCache
from .redis_cache import RedisCache from .redis_cache import RedisCache

View file

@ -35,13 +35,7 @@ from pydantic import BaseModel
import litellm import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.caching.caching import ( from litellm.caching.caching import S3Cache
Cache,
QdrantSemanticCache,
RedisCache,
RedisSemanticCache,
S3Cache,
)
from litellm.litellm_core_utils.logging_utils import ( from litellm.litellm_core_utils.logging_utils import (
_assemble_complete_response_from_streaming_chunks, _assemble_complete_response_from_streaming_chunks,
) )
@ -550,12 +544,7 @@ class LLMCachingHandler:
Returns: Returns:
Optional[Any]: Optional[Any]:
""" """
from litellm.utils import ( from litellm.utils import convert_to_model_response_object
CustomStreamWrapper,
convert_to_model_response_object,
convert_to_streaming_response,
convert_to_streaming_response_async,
)
if ( if (
call_type == CallTypes.acompletion.value call_type == CallTypes.acompletion.value

View file

@ -1,8 +1,6 @@
import json import json
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional
from litellm._logging import print_verbose
from .base_cache import BaseCache from .base_cache import BaseCache
if TYPE_CHECKING: if TYPE_CHECKING:

View file

@ -12,7 +12,7 @@ import asyncio
import time import time
import traceback import traceback
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING, Any, List, Optional, Tuple from typing import TYPE_CHECKING, Any, List, Optional
import litellm import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger

View file

@ -15,7 +15,6 @@ from typing import Any
import litellm import litellm
from litellm._logging import print_verbose from litellm._logging import print_verbose
from litellm.types.caching import LiteLLMCacheType
from .base_cache import BaseCache from .base_cache import BaseCache

View file

@ -13,7 +13,6 @@ import asyncio
import inspect import inspect
import json import json
import time import time
import traceback
from datetime import timedelta from datetime import timedelta
from typing import TYPE_CHECKING, Any, List, Optional, Tuple from typing import TYPE_CHECKING, Any, List, Optional, Tuple
@ -21,8 +20,7 @@ import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
from litellm.types.caching import RedisPipelineIncrementOperation from litellm.types.caching import RedisPipelineIncrementOperation
from litellm.types.services import ServiceLoggerPayload, ServiceTypes from litellm.types.services import ServiceTypes
from litellm.types.utils import all_litellm_params
from .base_cache import BaseCache from .base_cache import BaseCache
@ -53,7 +51,6 @@ class RedisCache(BaseCache):
startup_nodes: Optional[List] = None, # for redis-cluster startup_nodes: Optional[List] = None, # for redis-cluster
**kwargs, **kwargs,
): ):
import redis
from litellm._service_logger import ServiceLogging from litellm._service_logger import ServiceLogging

View file

@ -32,7 +32,6 @@ class RedisSemanticCache(BaseCache):
**kwargs, **kwargs,
): ):
from redisvl.index import SearchIndex from redisvl.index import SearchIndex
from redisvl.query import VectorQuery
print_verbose( print_verbose(
"redis semantic-cache initializing INDEX - litellm_semantic_cache_index" "redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
@ -141,7 +140,6 @@ class RedisSemanticCache(BaseCache):
def get_cache(self, key, **kwargs): def get_cache(self, key, **kwargs):
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}") print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
import numpy as np
from redisvl.query import VectorQuery from redisvl.query import VectorQuery
# query # query
@ -253,7 +251,6 @@ class RedisSemanticCache(BaseCache):
async def async_get_cache(self, key, **kwargs): async def async_get_cache(self, key, **kwargs):
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}") print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
import numpy as np
from redisvl.query import VectorQuery from redisvl.query import VectorQuery
from litellm.proxy.proxy_server import llm_model_list, llm_router from litellm.proxy.proxy_server import llm_model_list, llm_router

View file

@ -12,11 +12,9 @@ Has 4 methods:
import ast import ast
import asyncio import asyncio
import json import json
from typing import Any, Optional from typing import Optional
import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.types.caching import LiteLLMCacheType
from .base_cache import BaseCache from .base_cache import BaseCache
@ -103,7 +101,6 @@ class S3Cache(BaseCache):
self.set_cache(key=key, value=value, **kwargs) self.set_cache(key=key, value=value, **kwargs)
def get_cache(self, key, **kwargs): def get_cache(self, key, **kwargs):
import boto3
import botocore import botocore
try: try:

View file

@ -1,7 +1,6 @@
# What is this? # What is this?
## File for 'response_cost' calculation in Logging ## File for 'response_cost' calculation in Logging
import time import time
import traceback
from typing import Any, List, Literal, Optional, Tuple, Union from typing import Any, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel from pydantic import BaseModel
@ -44,14 +43,12 @@ from litellm.llms.openai.cost_calculation import (
cost_per_second as openai_cost_per_second, cost_per_second as openai_cost_per_second,
) )
from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
from litellm.llms.openai.cost_calculation import cost_router as openai_cost_router
from litellm.llms.together_ai.cost_calculator import get_model_params_and_category from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
from litellm.llms.vertex_ai.image_generation.cost_calculator import ( from litellm.llms.vertex_ai.image_generation.cost_calculator import (
cost_calculator as vertex_ai_image_cost_calculator, cost_calculator as vertex_ai_image_cost_calculator,
) )
from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.rerank import RerankResponse from litellm.types.rerank import RerankResponse
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage
from litellm.utils import ( from litellm.utils import (
CallTypes, CallTypes,

View file

@ -14,14 +14,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
import httpx import httpx
import litellm import litellm
from litellm import client, get_secret_str from litellm import get_secret_str
from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI
from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI
from litellm.llms.vertex_ai.files.handler import ( from litellm.llms.vertex_ai.files.handler import VertexAIFilesHandler
VertexAIFilesHandler,
)
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
Batch,
CreateFileRequest, CreateFileRequest,
FileContentRequest, FileContentRequest,
FileTypes, FileTypes,

View file

@ -19,10 +19,10 @@ import httpx
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI, FineTuningJob, FineTuningJobCreate from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import Hyperparameters from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
from litellm.types.router import * from litellm.types.router import *
from litellm.utils import supports_httpx_timeout from litellm.utils import supports_httpx_timeout

View file

@ -6,11 +6,9 @@ Slack alerts are sent every 10s or when events are greater than X events
see custom_batch_logger.py for more details / defaults see custom_batch_logger.py for more details / defaults
""" """
import os from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
from litellm._logging import verbose_logger, verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import AlertType, WebhookEvent
if TYPE_CHECKING: if TYPE_CHECKING:
from .slack_alerting import SlackAlerting as _SlackAlerting from .slack_alerting import SlackAlerting as _SlackAlerting
@ -21,7 +19,6 @@ else:
def squash_payloads(queue): def squash_payloads(queue):
import json
squashed = {} squashed = {}
if len(queue) == 0: if len(queue) == 0:

View file

@ -4,16 +4,10 @@ import asyncio
import datetime import datetime
import os import os
import random import random
import threading
import time import time
import traceback from datetime import timedelta
from datetime import datetime as dt from typing import Any, Dict, List, Literal, Optional, Union
from datetime import timedelta, timezone
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, Union, get_args
import aiohttp
import dotenv
from openai import APIError from openai import APIError
import litellm import litellm
@ -26,22 +20,13 @@ from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.litellm_core_utils.exception_mapping_utils import ( from litellm.litellm_core_utils.exception_mapping_utils import (
_add_key_name_and_team_to_alert, _add_key_name_and_team_to_alert,
) )
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import ( from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
AlertType,
CallInfo,
UserAPIKeyAuth,
VirtualKeyEvent,
WebhookEvent,
)
from litellm.router import Router from litellm.router import Router
from litellm.types.integrations.slack_alerting import * from litellm.types.integrations.slack_alerting import *
from litellm.types.router import LiteLLM_Params
from ..email_templates.templates import * from ..email_templates.templates import *
from .batching_handler import send_to_webhook, squash_payloads from .batching_handler import send_to_webhook, squash_payloads
@ -1261,7 +1246,7 @@ Model Info:
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
""" """
from litellm.proxy.proxy_server import premium_user, prisma_client from litellm.proxy.proxy_server import premium_user
from litellm.proxy.utils import send_email from litellm.proxy.utils import send_email
email_logo_url = os.getenv( email_logo_url = os.getenv(
@ -1370,7 +1355,6 @@ Model Info:
if alert_type not in self.alert_types: if alert_type not in self.alert_types:
return return
import json
from datetime import datetime from datetime import datetime
# Get the current timestamp # Get the current timestamp

View file

@ -5,7 +5,6 @@ Utils used for slack alerting
import asyncio import asyncio
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.proxy._types import AlertType from litellm.proxy._types import AlertType
from litellm.secret_managers.main import get_secret from litellm.secret_managers.main import get_secret

View file

@ -6,14 +6,9 @@ import asyncio
import json import json
import os import os
import random import random
import time
import traceback
import types import types
import uuid from typing import Any, Dict, List, Optional
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict, Union
import dotenv # type: ignore
import httpx import httpx
from pydantic import BaseModel # type: ignore from pydantic import BaseModel # type: ignore
@ -21,11 +16,7 @@ import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.prompt_templates.common_utils import (
get_content_from_model_response,
)
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
@ -33,7 +24,6 @@ from litellm.types.integrations.argilla import (
SUPPORTED_PAYLOAD_FIELDS, SUPPORTED_PAYLOAD_FIELDS,
ArgillaCredentialsObject, ArgillaCredentialsObject,
ArgillaItem, ArgillaItem,
ArgillaPayload,
) )
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import StandardLoggingPayload

View file

@ -5,7 +5,7 @@ this file has Arize ai specific helper functions
""" """
import json import json
from typing import TYPE_CHECKING, Any, Optional, Union from typing import TYPE_CHECKING, Any, Optional
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
@ -30,7 +30,6 @@ class ArizeLogger:
def set_arize_ai_attributes(span: Span, kwargs, response_obj): def set_arize_ai_attributes(span: Span, kwargs, response_obj):
from litellm.integrations._types.open_inference import ( from litellm.integrations._types.open_inference import (
MessageAttributes, MessageAttributes,
MessageContentAttributes,
OpenInferenceSpanKindValues, OpenInferenceSpanKindValues,
SpanAttributes, SpanAttributes,
) )

View file

@ -3,23 +3,8 @@ import json
import os import os
import uuid import uuid
from datetime import datetime, timedelta from datetime import datetime, timedelta
from re import S, T from typing import List, Optional
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Optional,
Tuple,
TypedDict,
Union,
)
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.constants import AZURE_STORAGE_MSFT_VERSION from litellm.constants import AZURE_STORAGE_MSFT_VERSION
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger

View file

@ -2,15 +2,10 @@
## Log success + failure events to Braintrust ## Log success + failure events to Braintrust
import copy import copy
import json
import os import os
import threading
import traceback
import uuid
from datetime import datetime from datetime import datetime
from typing import Literal, Optional from typing import Optional
import dotenv
import httpx import httpx
from pydantic import BaseModel from pydantic import BaseModel
@ -18,12 +13,11 @@ import litellm
from litellm import verbose_logger from litellm import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler, HTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.utils import get_formatted_prompt, print_verbose from litellm.utils import print_verbose
global_braintrust_http_handler = get_async_httpx_client( global_braintrust_http_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback llm_provider=httpxSpecialProvider.LoggingCallback

View file

@ -6,7 +6,7 @@ Use this if you want your logs to be stored in memory and flushed periodically
import asyncio import asyncio
import time import time
from typing import List, Literal, Optional from typing import List, Optional
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger

View file

@ -1,4 +1,4 @@
from typing import List, Literal, Optional from typing import List, Optional
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger

View file

@ -1,18 +1,14 @@
#### What this does #### #### What this does ####
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import os
import traceback import traceback
from datetime import datetime as datetimeObj
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
import dotenv
from pydantic import BaseModel from pydantic import BaseModel
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.argilla import ArgillaItem from litellm.types.integrations.argilla import ArgillaItem
from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import ( from litellm.types.utils import (
AdapterCompletionStreamWrapper, AdapterCompletionStreamWrapper,
EmbeddingResponse, EmbeddingResponse,

View file

@ -16,11 +16,10 @@ For batching specific details see CustomBatchLogger class
import asyncio import asyncio
import datetime import datetime
import os import os
import sys
import traceback import traceback
import uuid import uuid
from datetime import datetime as datetimeObj from datetime import datetime as datetimeObj
from typing import Any, Dict, List, Optional, Union from typing import Any, List, Optional, Union
from httpx import Response from httpx import Response
@ -32,7 +31,6 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.datadog import * from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import StandardLoggingPayload

View file

@ -8,12 +8,9 @@ API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=examp
import asyncio import asyncio
import os import os
import traceback
import uuid import uuid
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional
from httpx import Response
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger

View file

@ -1,14 +1,11 @@
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import datetime
import os import os
import traceback import traceback
import uuid import uuid
from typing import Any from typing import Any
import dotenv
import litellm import litellm

View file

@ -2,7 +2,6 @@
Functions for sending Email Alerts Functions for sending Email Alerts
""" """
import asyncio
import os import os
from typing import List, Optional from typing import List, Optional
@ -20,7 +19,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
) )
if team_id is None: if team_id is None:
return [] return []
from litellm.proxy.proxy_server import premium_user, prisma_client from litellm.proxy.proxy_server import prisma_client
if prisma_client is None: if prisma_client is None:
raise Exception("Not connected to DB!") raise Exception("Not connected to DB!")
@ -72,7 +71,6 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
Send an Email Alert to All Team Members when the Team Budget is crossed Send an Email Alert to All Team Members when the Team Budget is crossed
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
""" """
from litellm.proxy.proxy_server import premium_user, prisma_client
from litellm.proxy.utils import send_email from litellm.proxy.utils import send_email
_team_id = webhook_event.team_id _team_id = webhook_event.team_id

View file

@ -1,15 +1,12 @@
import os import os
from datetime import datetime
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import httpx
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
_get_httpx_client,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )

View file

@ -1,27 +1,14 @@
import asyncio import asyncio
import json
import os import os
import uuid import uuid
from datetime import datetime from datetime import datetime
from re import S from typing import TYPE_CHECKING, Any, Dict, List, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.proxy._types import CommonProxyErrors
from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
from litellm.types.integrations.gcs_bucket import * from litellm.types.integrations.gcs_bucket import *
from litellm.types.utils import ( from litellm.types.utils import StandardLoggingPayload
StandardCallbackDynamicParams,
StandardLoggingMetadata,
StandardLoggingPayload,
)
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.llms.vertex_ai.vertex_llm_base import VertexBase

View file

@ -1,13 +1,7 @@
import json import json
import os import os
import uuid from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
@ -15,11 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.types.integrations.gcs_bucket import * from litellm.types.integrations.gcs_bucket import *
from litellm.types.utils import ( from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload
StandardCallbackDynamicParams,
StandardLoggingMetadata,
StandardLoggingPayload,
)
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -190,9 +180,7 @@ class GCSBucketBase(CustomBatchLogger):
This function is used to get the Vertex instance for the GCS Bucket Logger. This function is used to get the Vertex instance for the GCS Bucket Logger.
It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it. It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it.
""" """
from litellm.llms.vertex_ai.vertex_llm_base import ( from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
VertexBase,
)
_in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials) _in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials)
if _in_memory_key not in self.vertex_instances: if _in_memory_key not in self.vertex_instances:

View file

@ -3,10 +3,7 @@
import os import os
import traceback import traceback
import dotenv
import litellm import litellm
from litellm._logging import verbose_logger
class HeliconeLogger: class HeliconeLogger:

View file

@ -3,11 +3,9 @@
import json import json
import os import os
import traceback
import uuid import uuid
from typing import Literal, Optional from typing import Literal, Optional
import dotenv
import httpx import httpx
import litellm import litellm

View file

@ -3,7 +3,6 @@
import copy import copy
import os import os
import traceback import traceback
import types
from collections.abc import MutableMapping, MutableSequence, MutableSet from collections.abc import MutableMapping, MutableSequence, MutableSet
from typing import TYPE_CHECKING, Any, Dict, Optional, cast from typing import TYPE_CHECKING, Any, Dict, Optional, cast

View file

@ -6,11 +6,8 @@ Used to get the LangFuseLogger for a given request
Handles Key/Team Based Langfuse Logging Handles Key/Team Based Langfuse Logging
""" """
import os
from typing import TYPE_CHECKING, Any, Dict, Optional from typing import TYPE_CHECKING, Any, Dict, Optional
from packaging.version import Version
from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams
from .langfuse import LangFuseLogger, LangfuseLoggingConfig from .langfuse import LangFuseLogger, LangfuseLoggingConfig

View file

@ -3,14 +3,12 @@
import asyncio import asyncio
import os import os
import random import random
import time
import traceback import traceback
import types import types
import uuid import uuid
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict, Union from typing import Any, Dict, List, Optional
import dotenv # type: ignore
import httpx import httpx
from pydantic import BaseModel # type: ignore from pydantic import BaseModel # type: ignore
@ -18,7 +16,6 @@ import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )

View file

@ -1,9 +1,7 @@
import traceback
import json import json
from litellm.integrations.custom_logger import CustomLogger from typing import TYPE_CHECKING, Any
from litellm.proxy._types import SpanAttributes
from typing import TYPE_CHECKING, Any, Optional, Union from litellm.proxy._types import SpanAttributes
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span

View file

@ -3,17 +3,12 @@
import json import json
import os import os
import traceback
import uuid
import dotenv
import httpx import httpx
import litellm import litellm
from litellm import verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler, HTTPHandler,
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,

View file

@ -1,7 +1,6 @@
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from functools import wraps
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import litellm import litellm
@ -10,10 +9,7 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm.types.services import ServiceLoggerPayload from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import ( from litellm.types.utils import (
ChatCompletionMessageToolCall, ChatCompletionMessageToolCall,
EmbeddingResponse,
Function, Function,
ImageResponse,
ModelResponse,
StandardLoggingPayload, StandardLoggingPayload,
) )
@ -139,7 +135,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[datetime, float]] = None,
event_metadata: Optional[dict] = None, event_metadata: Optional[dict] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -201,7 +196,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[float, datetime]] = None, end_time: Optional[Union[float, datetime]] = None,
event_metadata: Optional[dict] = None, event_metadata: Optional[dict] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -666,7 +660,6 @@ class OpenTelemetry(CustomLogger):
span.set_attribute(key, primitive_value) span.set_attribute(key, primitive_value)
def set_raw_request_attributes(self, span: Span, kwargs, response_obj): def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
from litellm.proxy._types import SpanAttributes
kwargs.get("optional_params", {}) kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}
@ -834,7 +827,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -889,7 +881,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from datetime import datetime
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode

View file

@ -3,8 +3,6 @@ import os
import time import time
from typing import Dict, Final, List, Optional from typing import Dict, Final, List, Optional
from litellm.types.utils import ModelResponse
CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config" CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config"

View file

@ -1,15 +1,10 @@
# used for /metrics endpoint on LiteLLM Proxy # used for /metrics endpoint on LiteLLM Proxy
#### What this does #### #### What this does ####
# On success, log events to Prometheus # On success, log events to Prometheus
import os
import subprocess
import sys import sys
import traceback from datetime import datetime, timedelta
import uuid from typing import Optional
from datetime import date, datetime, timedelta
from typing import Optional, TypedDict, Union
import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth

View file

@ -2,13 +2,10 @@
Helper functions to query prometheus API Helper functions to query prometheus API
""" """
import asyncio
import os
import time import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Optional from typing import Optional
import litellm
from litellm import get_secret from litellm import get_secret
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (

View file

@ -3,15 +3,8 @@
# On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers) # On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
import datetime
import os
import subprocess
import sys
import traceback
import uuid
from typing import List, Optional, Union from typing import List, Optional, Union
import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.types.integrations.prometheus import LATENCY_BUCKETS from litellm.types.integrations.prometheus import LATENCY_BUCKETS
from litellm.types.services import ServiceLoggerPayload, ServiceTypes from litellm.types.services import ServiceLoggerPayload, ServiceTypes

View file

@ -1,12 +1,6 @@
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import datetime
import os
import subprocess
import sys
import traceback
import uuid
from typing import Optional from typing import Optional
import litellm import litellm

View file

@ -1,14 +1,11 @@
#### What this does #### #### What this does ####
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import datetime
import os import os
import subprocess import subprocess
import sys import sys
import traceback import traceback
import dotenv
import litellm import litellm

View file

@ -1,6 +1,5 @@
import traceback import traceback
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
@ -12,9 +11,7 @@ class TraceloopLogger:
def __init__(self): def __init__(self):
try: try:
from opentelemetry.sdk.trace.export import ConsoleSpanExporter
from traceloop.sdk import Traceloop from traceloop.sdk import Traceloop
from traceloop.sdk.instruments import Instruments
from traceloop.sdk.tracing.tracing import TracerWrapper from traceloop.sdk.tracing.tracing import TracerWrapper
except ModuleNotFoundError as e: except ModuleNotFoundError as e:
verbose_logger.error( verbose_logger.error(
@ -39,7 +36,6 @@ class TraceloopLogger:
level="DEFAULT", level="DEFAULT",
status_message=None, status_message=None,
): ):
from opentelemetry import trace
from opentelemetry.semconv.ai import SpanAttributes from opentelemetry.semconv.ai import SpanAttributes
from opentelemetry.trace import SpanKind, Status, StatusCode from opentelemetry.trace import SpanKind, Status, StatusCode
@ -78,7 +74,7 @@ class TraceloopLogger:
) )
if "top_p" in optional_params: if "top_p" in optional_params:
span.set_attribute( span.set_attribute(
SpanAttributes.LLM_TOP_P, optional_params.get("top_p") SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
) )
if "tools" in optional_params or "functions" in optional_params: if "tools" in optional_params or "functions" in optional_params:
span.set_attribute( span.set_attribute(

View file

@ -173,16 +173,14 @@ except Exception:
#### What this does #### #### What this does ####
# On success, logs events to Langfuse # On success, logs events to Langfuse
import os
import traceback import traceback
from datetime import datetime
class WeightsBiasesLogger: class WeightsBiasesLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self): def __init__(self):
try: try:
import wandb pass
except Exception: except Exception:
raise Exception( raise Exception(
"\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m" "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"

View file

@ -3,7 +3,6 @@ from typing import Awaitable, Callable, Optional
import anyio import anyio
import anyio.to_thread import anyio.to_thread
from anyio import to_thread
from typing_extensions import ParamSpec, TypeVar from typing_extensions import ParamSpec, TypeVar
T_ParamSpec = ParamSpec("T_ParamSpec") T_ParamSpec = ParamSpec("T_ParamSpec")

View file

@ -1,7 +1,6 @@
# What is this? # What is this?
## Helper utilities ## Helper utilities
import os from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
import httpx import httpx

View file

@ -1,6 +1,4 @@
import json import json
import os
import threading
import traceback import traceback
from typing import Optional from typing import Optional
@ -14,17 +12,14 @@ from ..exceptions import (
APIError, APIError,
AuthenticationError, AuthenticationError,
BadRequestError, BadRequestError,
BudgetExceededError,
ContentPolicyViolationError, ContentPolicyViolationError,
ContextWindowExceededError, ContextWindowExceededError,
NotFoundError, NotFoundError,
OpenAIError,
PermissionDeniedError, PermissionDeniedError,
RateLimitError, RateLimitError,
ServiceUnavailableError, ServiceUnavailableError,
Timeout, Timeout,
UnprocessableEntityError, UnprocessableEntityError,
UnsupportedParamsError,
) )

Some files were not shown because too many files have changed in this diff Show more