(code quality) run ruff rule to ban unused imports (#7313)

* remove unused imports

* fix AmazonConverseConfig

* fix test

* fix import

* ruff check fixes

* test fixes

* fix testing

* fix imports
This commit is contained in:
Ishaan Jaff 2024-12-19 12:33:42 -08:00 committed by GitHub
parent 5e344497ce
commit c7f14e936a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
347 changed files with 5473 additions and 7207 deletions

View file

@ -1,6 +1,4 @@
from locust import HttpUser, task, between, events
import json
import time
from locust import HttpUser, task, between
class MyUser(HttpUser):
@ -10,7 +8,7 @@ class MyUser(HttpUser):
def chat_completion(self):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
"Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
# Include any additional headers you may need for authentication, etc.
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,423 +1,422 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "BmX0b5Ueh91v"
},
"source": [
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
"* Completion - Quick start\n",
"* Completion - Streaming\n",
"* Completion - Azure, OpenAI in separate threads\n",
"* Completion - Stress Test 10 requests in parallel\n",
"* Completion - Azure, OpenAI in the same thread"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
"* Completion - Quick start\n",
"* Completion - Streaming\n",
"* Completion - Azure, OpenAI in separate threads\n",
"* Completion - Stress Test 10 requests in parallel\n",
"* Completion - Azure, OpenAI in the same thread"
],
"metadata": {
"id": "BmX0b5Ueh91v"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iHq4d0dpfawS"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "mnveHO5dfcB0"
},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eo88QUdbiDIE"
},
"source": [
"## Completion - Quick start"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
},
"outputs": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iHq4d0dpfawS"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"source": [
"import os, litellm"
],
"metadata": {
"id": "mnveHO5dfcB0"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Quick start"
],
"metadata": {
"id": "eo88QUdbiDIE"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Openai Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708958,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 26,\n",
" \"total_tokens\": 39\n",
" }\n",
"}\n",
"Azure Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708960,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 27,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 41\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Completion - Streaming"
],
"metadata": {
"id": "dQMkM-diiKdE"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"OpenAI Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Azure Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n"
],
"metadata": {
"id": "uVvJDVn4g1i1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in separate threads"
],
"metadata": {
"id": "4xrOPnt-oqwm"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create threads for making the completions\n",
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
"\n",
"# Start both threads\n",
"thread1.start()\n",
"thread2.start()\n",
"\n",
"# Wait for both threads to finish\n",
"thread1.join()\n",
"thread2.join()\n",
"\n",
"print(\"Both completions are done.\")"
],
"metadata": {
"id": "V5b5taJPjvC3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Stress Test 10 requests in parallel\n",
"\n"
],
"metadata": {
"id": "lx8DbMBqoAoN"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# Set your API keys\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create and start 10 threads for making completions\n",
"threads = []\n",
"for i in range(10):\n",
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
" threads.append(thread)\n",
" thread.start()\n",
"\n",
"# Wait for all threads to finish\n",
"for thread in threads:\n",
" thread.join()\n",
"\n",
"print(\"All completions are done.\")\n"
],
"metadata": {
"id": "pHYANOlOkoDh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in the same thread"
],
"metadata": {
"id": "yB2NDOO4oxrp"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710847,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 42\n",
" }\n",
"}\n",
"Azure OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710849,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 29,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
}
]
"name": "stdout",
"output_type": "stream",
"text": [
"Openai Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708958,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 26,\n",
" \"total_tokens\": 39\n",
" }\n",
"}\n",
"Azure Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708960,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 27,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 41\n",
" }\n",
"}\n"
]
}
]
],
"source": [
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dQMkM-diiKdE"
},
"source": [
"## Completion - Streaming"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uVvJDVn4g1i1"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"OpenAI Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Azure Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4xrOPnt-oqwm"
},
"source": [
"## Completion - Azure, OpenAI in separate threads"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "V5b5taJPjvC3"
},
"outputs": [],
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create threads for making the completions\n",
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
"\n",
"# Start both threads\n",
"thread1.start()\n",
"thread2.start()\n",
"\n",
"# Wait for both threads to finish\n",
"thread1.join()\n",
"thread2.join()\n",
"\n",
"print(\"Both completions are done.\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lx8DbMBqoAoN"
},
"source": [
"## Completion - Stress Test 10 requests in parallel\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pHYANOlOkoDh"
},
"outputs": [],
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# Set your API keys\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create and start 10 threads for making completions\n",
"threads = []\n",
"for i in range(10):\n",
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
" threads.append(thread)\n",
" thread.start()\n",
"\n",
"# Wait for all threads to finish\n",
"for thread in threads:\n",
" thread.join()\n",
"\n",
"print(\"All completions are done.\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "yB2NDOO4oxrp"
},
"source": [
"## Completion - Azure, OpenAI in the same thread"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710847,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 42\n",
" }\n",
"}\n",
"Azure OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710849,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 29,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
}
],
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View file

@ -1,166 +1,163 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "MbLbs1tbISk-"
},
"source": [
"# LiteLLM Batch Completions Example\n",
"\n",
"* This tutorial walks through using `batch_completion`\n",
"* Docs: https://docs.litellm.ai/docs/completion/batching"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM Batch Completions Example\n",
"\n",
"* This tutorial walks through using `batch_completion`\n",
"* Docs: https://docs.litellm.ai/docs/completion/batching"
],
"metadata": {
"id": "MbLbs1tbISk-"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ty6-ko_aDlPF"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KGhNJRUCIh1j"
},
"source": [
"## Import Batch Completion"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "LOtI43snDrSK"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import batch_completion\n",
"\n",
"# set your API_KEY\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Xhv92NBaIpaw"
},
"source": [
"## Calling `litellm.batch_completion`\n",
"\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
},
"outputs": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ty6-ko_aDlPF"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"source": [
"## Import Batch Completion"
],
"metadata": {
"id": "KGhNJRUCIh1j"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"# set your API_KEY\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\""
],
"metadata": {
"id": "LOtI43snDrSK"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling `litellm.batch_completion`\n",
"\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
],
"metadata": {
"id": "Xhv92NBaIpaw"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Good morning!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030351.309254,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 3,\n",
" \"total_tokens\": 14\n",
" }\n",
" },\n",
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030352.1215081,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 22,\n",
" \"total_tokens\": 35\n",
" }\n",
" }]"
]
},
"metadata": {},
"execution_count": 11
}
"data": {
"text/plain": [
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Good morning!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030351.309254,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 3,\n",
" \"total_tokens\": 14\n",
" }\n",
" },\n",
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030352.1215081,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 22,\n",
" \"total_tokens\": 35\n",
" }\n",
" }]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
]
],
"source": [
"import os\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,204 +1,205 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "680oRk1af-xJ"
},
"source": [
"# Environment Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "X7TgJFn8f88p"
},
"outputs": [],
"source": [
"import csv\n",
"from typing import Optional\n",
"import httpx, json\n",
"import asyncio\n",
"\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rauw8EOhgBz5"
},
"outputs": [],
"source": [
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
"class HTTPHandler:\n",
" def __init__(self, concurrent_limit=1000):\n",
" # Create a client with a connection pool\n",
" self.client = httpx.AsyncClient(\n",
" limits=httpx.Limits(\n",
" max_connections=concurrent_limit,\n",
" max_keepalive_connections=concurrent_limit,\n",
" )\n",
" )\n",
"\n",
" async def close(self):\n",
" # Close the client when you're done with it\n",
" await self.client.aclose()\n",
"\n",
" async def get(\n",
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
" ):\n",
" response = await self.client.get(url, params=params, headers=headers)\n",
" return response\n",
"\n",
" async def post(\n",
" self,\n",
" url: str,\n",
" data: Optional[dict] = None,\n",
" params: Optional[dict] = None,\n",
" headers: Optional[dict] = None,\n",
" ):\n",
" try:\n",
" response = await self.client.post(\n",
" url, data=data, params=params, headers=headers\n",
" )\n",
" return response\n",
" except Exception as e:\n",
" raise e\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7LXN8zaLgOie"
},
"source": [
"# Import Sheet\n",
"\n",
"\n",
"Format: | ID | Name | Max Budget |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oiED0usegPGf"
},
"outputs": [],
"source": [
"async def import_sheet():\n",
" tasks = []\n",
" http_client = HTTPHandler()\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for row in csv_reader:\n",
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
" tasks.append(task)\n",
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
"\n",
" keys = await asyncio.gather(*tasks)\n",
"\n",
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
" csv_writer.writeheader()\n",
"\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for i, row in enumerate(csv_reader):\n",
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
" csv_writer.writerow(row)\n",
"\n",
" await http_client.close()\n",
"\n",
"asyncio.run(import_sheet())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E7M0Li_UgJeZ"
},
"source": [
"# Create Users + Keys\n",
"\n",
"- Creates a user\n",
"- Creates a key with max budget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZudRFujf7j-"
},
"outputs": [],
"source": [
"\n",
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"key/generate\"\n",
"\n",
" # call /key/generate\n",
" print(\"CALLING /KEY/GENERATE\")\n",
" response = await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"key_alias\": f\"{user_id}-key\",\n",
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
" })\n",
" )\n",
" print(f\"response: {response.text}\")\n",
" return response.json()[\"key\"]\n",
"\n",
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
" \"\"\"\n",
" - call /user/new\n",
" - create key for user\n",
" \"\"\"\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"user/new\"\n",
"\n",
" # call /user/new\n",
" await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"user_alias\": user_name,\n",
" \"auto_create_key\": False,\n",
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
" })\n",
" )\n",
"\n",
" # create key for user\n",
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "680oRk1af-xJ"
},
"source": [
"# Environment Setup"
]
},
"nbformat": 4,
"nbformat_minor": 0
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "X7TgJFn8f88p"
},
"outputs": [],
"source": [
"import csv\n",
"from typing import Optional\n",
"import httpx\n",
"import json\n",
"import asyncio\n",
"\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rauw8EOhgBz5"
},
"outputs": [],
"source": [
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
"class HTTPHandler:\n",
" def __init__(self, concurrent_limit=1000):\n",
" # Create a client with a connection pool\n",
" self.client = httpx.AsyncClient(\n",
" limits=httpx.Limits(\n",
" max_connections=concurrent_limit,\n",
" max_keepalive_connections=concurrent_limit,\n",
" )\n",
" )\n",
"\n",
" async def close(self):\n",
" # Close the client when you're done with it\n",
" await self.client.aclose()\n",
"\n",
" async def get(\n",
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
" ):\n",
" response = await self.client.get(url, params=params, headers=headers)\n",
" return response\n",
"\n",
" async def post(\n",
" self,\n",
" url: str,\n",
" data: Optional[dict] = None,\n",
" params: Optional[dict] = None,\n",
" headers: Optional[dict] = None,\n",
" ):\n",
" try:\n",
" response = await self.client.post(\n",
" url, data=data, params=params, headers=headers\n",
" )\n",
" return response\n",
" except Exception as e:\n",
" raise e\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7LXN8zaLgOie"
},
"source": [
"# Import Sheet\n",
"\n",
"\n",
"Format: | ID | Name | Max Budget |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oiED0usegPGf"
},
"outputs": [],
"source": [
"async def import_sheet():\n",
" tasks = []\n",
" http_client = HTTPHandler()\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for row in csv_reader:\n",
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
" tasks.append(task)\n",
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
"\n",
" keys = await asyncio.gather(*tasks)\n",
"\n",
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
" csv_writer.writeheader()\n",
"\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for i, row in enumerate(csv_reader):\n",
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
" csv_writer.writerow(row)\n",
"\n",
" await http_client.close()\n",
"\n",
"asyncio.run(import_sheet())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E7M0Li_UgJeZ"
},
"source": [
"# Create Users + Keys\n",
"\n",
"- Creates a user\n",
"- Creates a key with max budget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZudRFujf7j-"
},
"outputs": [],
"source": [
"\n",
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"key/generate\"\n",
"\n",
" # call /key/generate\n",
" print(\"CALLING /KEY/GENERATE\")\n",
" response = await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"key_alias\": f\"{user_id}-key\",\n",
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
" })\n",
" )\n",
" print(f\"response: {response.text}\")\n",
" return response.json()[\"key\"]\n",
"\n",
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
" \"\"\"\n",
" - call /user/new\n",
" - create key for user\n",
" \"\"\"\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"user/new\"\n",
"\n",
" # call /user/new\n",
" await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"user_alias\": user_name,\n",
" \"auto_create_key\": False,\n",
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
" })\n",
" )\n",
"\n",
" # create key for user\n",
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because it is too large Load diff

View file

@ -1,159 +1,157 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "eKXncoQbU_2j"
},
"source": [
"# Using Nemo-Guardrails with LiteLLM Server\n",
"\n",
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Using Nemo-Guardrails with LiteLLM Server\n",
"\n",
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
],
"metadata": {
"id": "eKXncoQbU_2j"
}
},
{
"cell_type": "markdown",
"source": [
"## Using with Bedrock\n",
"\n",
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
],
"metadata": {
"id": "ZciYaLwvuFbu"
}
},
{
"cell_type": "code",
"source": [
"pip install nemoguardrails langchain"
],
"metadata": {
"id": "vOUwGSJ2Vsy3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xXEJNxe7U0IN"
},
"outputs": [],
"source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
]
},
{
"cell_type": "markdown",
"source": [
"## Using with TogetherAI\n",
"\n",
"1. You can either set this in the server environment:\n",
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
"\n",
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
],
"metadata": {
"id": "vz5n00qyuKjp"
}
},
{
"cell_type": "code",
"source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
],
"metadata": {
"id": "XK1sk-McuhpE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### CONFIG.YML\n",
"\n",
"save this example `config.yml` in your current directory"
],
"metadata": {
"id": "8A1KWKnzuxAS"
}
},
{
"cell_type": "code",
"source": [
"# instructions:\n",
"# - type: general\n",
"# content: |\n",
"# Below is a conversation between a bot and a user about the recent job reports.\n",
"# The bot is factual and concise. If the bot does not know the answer to a\n",
"# question, it truthfully says it does not know.\n",
"\n",
"# sample_conversation: |\n",
"# user \"Hello there!\"\n",
"# express greeting\n",
"# bot express greeting\n",
"# \"Hello! How can I assist you today?\"\n",
"# user \"What can you do for me?\"\n",
"# ask about capabilities\n",
"# bot respond about capabilities\n",
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
"# user \"What's 2+2?\"\n",
"# ask math question\n",
"# bot responds to math question\n",
"# \"2+2 is equal to 4.\"\n",
"\n",
"# models:\n",
"# - type: main\n",
"# engine: openai\n",
"# model: claude-instant-1"
],
"metadata": {
"id": "NKN1GmSvu0Cx"
},
"execution_count": null,
"outputs": []
}
]
{
"cell_type": "markdown",
"metadata": {
"id": "ZciYaLwvuFbu"
},
"source": [
"## Using with Bedrock\n",
"\n",
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "vOUwGSJ2Vsy3"
},
"outputs": [],
"source": [
"pip install nemoguardrails langchain"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xXEJNxe7U0IN"
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vz5n00qyuKjp"
},
"source": [
"## Using with TogetherAI\n",
"\n",
"1. You can either set this in the server environment:\n",
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
"\n",
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XK1sk-McuhpE"
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8A1KWKnzuxAS"
},
"source": [
"### CONFIG.YML\n",
"\n",
"save this example `config.yml` in your current directory"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NKN1GmSvu0Cx"
},
"outputs": [],
"source": [
"# instructions:\n",
"# - type: general\n",
"# content: |\n",
"# Below is a conversation between a bot and a user about the recent job reports.\n",
"# The bot is factual and concise. If the bot does not know the answer to a\n",
"# question, it truthfully says it does not know.\n",
"\n",
"# sample_conversation: |\n",
"# user \"Hello there!\"\n",
"# express greeting\n",
"# bot express greeting\n",
"# \"Hello! How can I assist you today?\"\n",
"# user \"What can you do for me?\"\n",
"# ask about capabilities\n",
"# bot respond about capabilities\n",
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
"# user \"What's 2+2?\"\n",
"# ask math question\n",
"# bot responds to math question\n",
"# \"2+2 is equal to 4.\"\n",
"\n",
"# models:\n",
"# - type: main\n",
"# engine: openai\n",
"# model: claude-instant-1"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,16 +1,12 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import litellm
from litellm import embedding, completion, completion_cost
from autoevals.llm import *
###################
import litellm
# litellm completion call
question = "which country has the highest population"

View file

@ -1,11 +1,12 @@
import traceback
from flask import Flask, request, jsonify, abort, Response
from flask import Flask, request, Response
from flask_cors import CORS
import traceback
import litellm
from util import handle_error
from litellm import completion
import os, dotenv, time
import os
import dotenv
import time
import json
dotenv.load_dotenv()
@ -20,9 +21,9 @@ verbose = True
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
######### PROMPT LOGGING ##########
os.environ[
"PROMPTLAYER_API_KEY"
] = "" # set your promptlayer key here - https://promptlayer.com/
os.environ["PROMPTLAYER_API_KEY"] = (
"" # set your promptlayer key here - https://promptlayer.com/
)
# set callbacks
litellm.success_callback = ["promptlayer"]
@ -57,9 +58,9 @@ def api_completion():
try:
if "prompt" not in data:
raise ValueError("data needs to have prompt")
data[
"model"
] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
data["model"] = (
"togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
)
# COMPLETION CALL
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
messages = [
@ -75,7 +76,7 @@ def api_completion():
"stream" in data and data["stream"] == True
): # use generate_responses to stream responses
return Response(data_generator(response), mimetype="text/event-stream")
except Exception as e:
except Exception:
# call handle_error function
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
## LOG FAILURE

View file

@ -1,5 +1,4 @@
import requests
from urllib.parse import urlparse, parse_qs
def get_next_url(response):

View file

@ -1,238 +1,237 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "gZx-wHJapG5w"
},
"source": [
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
"\n",
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
"\n",
"\n",
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
"Example call\n",
"```python\n",
"model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```"
]
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4JSRa0QVogPo"
},
"outputs": [],
"source": [
"!pip install litellm==0.1.399\n",
"!pip install baseten urllib3"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "VEukLhDzo4vw"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4STYM2OHFNlc"
},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"id": "DorpLxw1FHbC"
},
"outputs": [],
"source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "syF3dTdKFSQQ"
},
"source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
},
"outputs": [
{
"cell_type": "markdown",
"source": [
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
"\n",
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
"\n",
"\n",
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
"Example call\n",
"```python\n",
"model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```"
],
"metadata": {
"id": "gZx-wHJapG5w"
}
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4JSRa0QVogPo"
},
"outputs": [],
"source": [
"!pip install litellm==0.1.399\n",
"!pip install baseten urllib3"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"import litellm\n",
"from litellm import completion"
],
"metadata": {
"id": "VEukLhDzo4vw"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Setup"
],
"metadata": {
"id": "4STYM2OHFNlc"
}
},
{
"cell_type": "code",
"source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
],
"metadata": {
"id": "DorpLxw1FHbC"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "syF3dTdKFSQQ"
}
},
{
"cell_type": "code",
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
" 'created': 1692135883.699066,\n",
" 'model': 'qvv0xeq'}"
]
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "7n21UroEGCGa"
}
},
{
"cell_type": "code",
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
},
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
" 'created': 1692135900.2806294,\n",
" 'model': 'q841o8w'}"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "6-TFwmPAGPXq"
}
},
{
"cell_type": "code",
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
" 'created': 1692135914.7472186,\n",
" 'model': '31dxrj3'}"
]
},
"metadata": {},
"execution_count": 20
}
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
" 'created': 1692135883.699066,\n",
" 'model': 'qvv0xeq'}"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
]
],
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7n21UroEGCGa"
},
"source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
" 'created': 1692135900.2806294,\n",
" 'model': 'q841o8w'}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6-TFwmPAGPXq"
},
"source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
" 'created': 1692135914.7472186,\n",
" 'model': '31dxrj3'}"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,201 +1,195 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "5hwntUxTMxEk"
},
"source": [
"# Langchain liteLLM Demo Notebook\n",
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
"\n",
"Call all LLM models using the same I/O interface\n",
"\n",
"Example usage\n",
"```python\n",
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Langchain liteLLM Demo Notebook\n",
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
"\n",
"Call all LLM models using the same I/O interface\n",
"\n",
"Example usage\n",
"```python\n",
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```"
],
"metadata": {
"id": "5hwntUxTMxEk"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aPNAUsCvB6Sv"
},
"outputs": [],
"source": [
"!pip install litellm langchain"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "MOhRaVnhB-0J"
},
"outputs": [],
"source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.schema import HumanMessage"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"outputs": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aPNAUsCvB6Sv"
},
"outputs": [],
"source": [
"!pip install litellm langchain"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
],
"metadata": {
"id": "MOhRaVnhB-0J"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 30
}
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
]
],
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -43,7 +43,7 @@
"source": [
"# set you Vertex AI configs\n",
"import litellm\n",
"from litellm import embedding, completion\n",
"from litellm import completion\n",
"\n",
"litellm.vertex_project = \"hardy-device-386718\"\n",
"litellm.vertex_location = \"us-central1\""

View file

@ -1,331 +1,331 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "vnvlwUDZK7VA"
},
"source": [
"## Demo Notebook of Function Calling with liteLLM\n",
"- Supported Providers for Function Calling\n",
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
"- In this notebook we use function calling with `litellm.completion()`"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"## Demo Notebook of Function Calling with liteLLM\n",
"- Supported Providers for Function Calling\n",
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
"- In this notebook we use function calling with `litellm.completion()`"
],
"metadata": {
"id": "vnvlwUDZK7VA"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KrINCwRfLgZV"
},
"outputs": [],
"source": [
"## Install liteLLM\n",
"!pip install litellm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "nK7zR5OgLlh2"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"id": "dCQlyBxKLqbA"
},
"outputs": [],
"source": [
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "gfdGv-FMRCdX"
},
"source": [
"## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "ERzsP1sfM19C"
},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
"]\n",
"\n",
"def get_current_weather(location):\n",
" if location == \"Boston, MA\":\n",
" return \"The weather is 12F\"\n",
"\n",
"functions = [\n",
" {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
" },\n",
" \"unit\": {\n",
" \"type\": \"string\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
" }\n",
" },\n",
" \"required\": [\"location\"]\n",
" }\n",
" }\n",
" ]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NX6by2VuRPnp"
},
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
},
"outputs": [
{
"cell_type": "code",
"source": [
"## Install liteLLM\n",
"!pip install litellm"
],
"metadata": {
"id": "KrINCwRfLgZV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import os, litellm\n",
"from litellm import completion"
],
"metadata": {
"id": "nK7zR5OgLlh2"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
],
"metadata": {
"id": "dCQlyBxKLqbA"
},
"execution_count": 27,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
],
"metadata": {
"id": "gfdGv-FMRCdX"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
"]\n",
"\n",
"def get_current_weather(location):\n",
" if location == \"Boston, MA\":\n",
" return \"The weather is 12F\"\n",
"\n",
"functions = [\n",
" {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
" },\n",
" \"unit\": {\n",
" \"type\": \"string\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
" }\n",
" },\n",
" \"required\": [\"location\"]\n",
" }\n",
" }\n",
" ]"
],
"metadata": {
"id": "ERzsP1sfM19C"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
],
"metadata": {
"id": "NX6by2VuRPnp"
}
},
{
"cell_type": "code",
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801223,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": null,\n",
" \"function_call\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
" }\n",
" },\n",
" \"finish_reason\": \"function_call\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 82,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 100\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call"
],
"metadata": {
"id": "Yu0o2saDNLx8"
}
},
{
"cell_type": "code",
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"}"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"get_current_weather {'location': 'Boston, MA'}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Call the get_current_weather() function"
],
"metadata": {
"id": "z3tstH_yN3fX"
}
},
{
"cell_type": "code",
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
},
"execution_count": 24,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"12F\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Send the response from get_current_weather back to the model to summarize"
],
"metadata": {
"id": "k4HGJE3NRmMI"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801963,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 109,\n",
" \"completion_tokens\": 12,\n",
" \"total_tokens\": 121\n",
" }\n",
"}\n"
]
}
]
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801223,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": null,\n",
" \"function_call\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
" }\n",
" },\n",
" \"finish_reason\": \"function_call\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 82,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 100\n",
" }\n",
"}\n"
]
}
]
],
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Yu0o2saDNLx8"
},
"source": [
"## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
},
"outputs": [
{
"data": {
"text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"get_current_weather {'location': 'Boston, MA'}\n"
]
}
],
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "z3tstH_yN3fX"
},
"source": [
"## Call the get_current_weather() function"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"12F\n"
]
}
],
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "k4HGJE3NRmMI"
},
"source": [
"## Send the response from get_current_weather back to the model to summarize"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801963,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 109,\n",
" \"completion_tokens\": 12,\n",
" \"total_tokens\": 121\n",
" }\n",
"}\n"
]
}
],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,13 +1,13 @@
import openai
api_base = f"http://0.0.0.0:8000"
api_base = "http://0.0.0.0:8000"
openai.api_base = api_base
openai.api_key = "temp-key"
print(openai.api_base)
print(f"LiteLLM: response from proxy with streaming")
print("LiteLLM: response from proxy with streaming")
response = openai.ChatCompletion.create(
model="ollama/llama2",
messages=[

File diff suppressed because one or more lines are too long

View file

@ -1,52 +1,51 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "j6yJsCGeaq8G"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
"cells": [
{
"cell_type": "code",
"source": [
"!pip install litellm"
],
"metadata": {
"id": "j6yJsCGeaq8G"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u129iWNPaf72"
},
"outputs": [],
"source": [
"import litellm\n",
"from litellm import embedding, completion\n",
"\n",
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
"\n",
"user_message = \"Hello, how are you?\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"for model in model_fallback_list:\n",
" try:\n",
" response = completion(model=model, messages=messages)\n",
" except Exception as e:\n",
" print(f\"error occurred: {traceback.format_exc()}\")"
]
}
]
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u129iWNPaf72"
},
"outputs": [],
"source": [
"from litellm import completion\n",
"\n",
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
"\n",
"user_message = \"Hello, how are you?\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"for model in model_fallback_list:\n",
" try:\n",
" response = completion(model=model, messages=messages)\n",
" except Exception:\n",
" print(f\"error occurred: {traceback.format_exc()}\")"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,14 +1,12 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
@ -137,7 +135,7 @@ for future in futures:
else:
failed_calls += 1
print(f"Load test Summary:")
print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
@ -160,7 +158,7 @@ for future in futures:
else:
failed_calls += 1
print(f"Load test Summary:")
print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
@ -132,7 +130,7 @@ for future in futures:
else:
failed_calls += 1
print(f"Load test Summary:")
print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,9 @@
from fastapi import FastAPI
import uvicorn
from memory_profiler import profile, memory_usage
from memory_profiler import profile
import os
import traceback
import asyncio
import pytest
import litellm
from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
import uuid

View file

@ -1,17 +1,16 @@
#### What this tests ####
from memory_profiler import profile, memory_usage
import sys, os, time
import traceback, asyncio
import pytest
from memory_profiler import profile
import sys
import os
import time
import asyncio
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
import uuid

View file

@ -1,17 +1,16 @@
#### What this tests ####
from memory_profiler import profile, memory_usage
import sys, os, time
import traceback, asyncio
import pytest
from memory_profiler import profile
import sys
import os
import time
import asyncio
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
import uuid

View file

@ -1,17 +1,14 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
import copy
load_dotenv()
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import asyncio
from litellm import Router, Timeout
from litellm import Timeout
import time
from litellm.caching.caching import Cache
import litellm
import openai
### Test just calling AsyncAzureOpenAI

View file

@ -1,7 +1,6 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
import copy
load_dotenv()
sys.path.insert(

View file

@ -1,7 +1,6 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
import copy
load_dotenv()
sys.path.insert(

View file

@ -1,5 +1,4 @@
import requests
import json
def get_initial_config():

View file

@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):
litellm_model_name = litellm_params.get("model", "") or ""
if "vertex_ai/" in litellm_model_name:
print(f"\033[91m\nSkipping Vertex AI model\033[0m", model)
print("\033[91m\nSkipping Vertex AI model\033[0m", model)
continue
for param, value in litellm_params.items():

View file

@ -1,7 +1,6 @@
import os
from openai import OpenAI
from dotenv import load_dotenv
import httpx
import concurrent.futures
load_dotenv()

View file

@ -2,21 +2,16 @@
import json
import boto3
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
import io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
import io
import json
class TokenIterator:
@ -48,7 +43,6 @@ payload = {
"stream": True,
}
import boto3
client = boto3.client("sagemaker-runtime", region_name="us-west-2")
response = client.invoke_endpoint_with_response_stream(

View file

@ -111,7 +111,6 @@
},
"outputs": [],
"source": [
"import mlflow\n",
"mlflow.langchain.autolog()"
]
},

View file

@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
"""
import asyncio
import os
# Enter your DATABASE_URL here
@ -33,7 +32,7 @@ async def check_view_exists(): # noqa: PLR0915
# Try to select one row from the view
await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
print("LiteLLM_VerificationTokenView Exists!") # noqa
except Exception as e:
except Exception:
# If an error occurs, the view does not exist, so create it
await db.execute_raw(
"""
@ -54,7 +53,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
print("MonthlyGlobalSpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
SELECT
@ -74,7 +73,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
print("Last30dKeysBySpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
SELECT
@ -102,7 +101,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
print("Last30dModelsBySpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
SELECT
@ -124,7 +123,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
print("MonthlyGlobalSpendPerKey Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS
SELECT
@ -147,7 +146,7 @@ async def check_view_exists(): # noqa: PLR0915
"""SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
)
print("MonthlyGlobalSpendPerUserPerKey Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS
SELECT
@ -171,7 +170,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
print("DailyTagSpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW DailyTagSpend AS
SELECT
@ -189,7 +188,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
print("Last30dTopEndUsersSpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE VIEW "Last30dTopEndUsersSpend" AS
SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend

View file

@ -17,7 +17,7 @@ async def log_event(request: Request):
# For now, just printing the received data
return {"message": "Request received successfully"}
except Exception as e:
except Exception:
raise HTTPException(status_code=500, detail="Internal Server Error")

View file

@ -2,12 +2,10 @@
#### What this does ####
# On success, logs events to Promptlayer
import dotenv, os
import os
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching.caching import DualCache
from typing import Literal, Union, Optional
from typing import Optional
import traceback
@ -15,10 +13,8 @@ import traceback
#### What this does ####
# On success + failure, log events to Supabase
import dotenv, os
import traceback
import datetime, subprocess, sys
import litellm, uuid
import litellm
import uuid
from litellm._logging import print_verbose, verbose_logger

View file

@ -11,9 +11,9 @@ import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from typing import Optional, Literal, Union, Any
import litellm, traceback, sys, uuid
from litellm.caching.caching import DualCache
from typing import Optional, Literal, Any
import litellm
import sys
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_guardrail import CustomGuardrail
from fastapi import HTTPException
@ -23,14 +23,10 @@ from litellm.litellm_core_utils.logging_utils import (
convert_litellm_response_object_to_str,
)
from typing import List
from datetime import datetime
import aiohttp, asyncio
from litellm._logging import verbose_proxy_logger
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
import httpx
import json
from litellm.types.guardrails import GuardrailEventHooks
@ -147,7 +143,6 @@ class AporiaGuardrail(CustomGuardrail):
from litellm.proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header,
)
from litellm.types.guardrails import GuardrailEventHooks
"""
Use this for the post call moderation with Guardrails
@ -183,7 +178,6 @@ class AporiaGuardrail(CustomGuardrail):
from litellm.proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header,
)
from litellm.types.guardrails import GuardrailEventHooks
event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
if self.should_run_guardrail(data=data, event_type=event_type) is not True:

View file

@ -7,14 +7,13 @@
## Reject a call / response if it contains certain keywords
from typing import Optional, Literal
from typing import Literal
import litellm
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException
import json, traceback
class _ENTERPRISE_BannedKeywords(CustomLogger):
@ -73,7 +72,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
- check if user id part of call
- check if user id part of blocked list
"""
self.print_verbose(f"Inside Banned Keyword List Pre-Call Hook")
self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
if call_type == "completion" and "messages" in data:
for m in data["messages"]:
if "content" in m and isinstance(m["content"], str):

View file

@ -15,7 +15,6 @@ from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException
import json, traceback
class _ENTERPRISE_BlockedUserList(CustomLogger):
@ -69,7 +68,7 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
- check if end-user in cache
- check if end-user in db
"""
self.print_verbose(f"Inside Blocked User List Pre-Call Hook")
self.print_verbose("Inside Blocked User List Pre-Call Hook")
if "user_id" in data or "user" in data:
user = data.get("user_id", data.get("user", ""))
if (

View file

@ -7,21 +7,12 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid
from litellm.caching.caching import DualCache
from typing import Literal
import litellm
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
class _ENTERPRISE_GoogleTextModeration(CustomLogger):

View file

@ -7,28 +7,24 @@
# +-------------------------------------------------------------+
# Thank you users! We ❤️ you! - Krrish & Ishaan
import sys, os
import sys
import os
from collections.abc import Iterable
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid
from litellm.caching.caching import DualCache
from typing import Optional, Literal
import litellm
import sys
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.types.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
Choices,
)
from datetime import datetime
import aiohttp, asyncio
litellm.set_verbose = True

View file

@ -7,26 +7,13 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan
## This provides an LLM Guard Integration for content moderation on the proxy
from typing import Optional, Literal, Union
from typing import Optional, Literal
import litellm
import traceback
import sys
import uuid
import os
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp
import asyncio
from litellm.utils import get_formatted_prompt
from litellm.secret_managers.main import get_secret_str
@ -164,7 +151,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
"moderation",
"audio_transcription",
]
except Exception as e:
except Exception:
self.print_verbose(
f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
)

View file

@ -5,27 +5,19 @@
# +-------------------------------------------------------------+
# Thank you users! We ❤️ you! - Krrish & Ishaan
import sys, os
import sys
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid
from litellm.caching.caching import DualCache
from typing import Literal
import litellm
import sys
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
from litellm._logging import verbose_proxy_logger
litellm.set_verbose = True

View file

@ -471,8 +471,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
data: dict,
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
):
from detect_secrets import SecretsCollection
from detect_secrets.settings import default_settings
if await self.should_run_check(user_api_key_dict) is False:
return

View file

@ -1,6 +1,5 @@
# Enterprise Proxy Util Endpoints
from typing import Optional, List
from litellm._logging import verbose_logger
from litellm.proxy.proxy_server import PrismaClient, HTTPException
from litellm.llms.custom_httpx.http_handler import HTTPHandler
import collections
@ -116,7 +115,7 @@ async def ui_get_spend_by_tags(
def _forecast_daily_cost(data: list):
from datetime import datetime, timedelta
from datetime import timedelta
if len(data) == 0:
return {

View file

@ -1063,9 +1063,9 @@ from .llms.sagemaker.chat.transformation import SagemakerChatConfig
from .llms.ollama_chat import OllamaChatConfig
from .llms.bedrock.chat.invoke_handler import (
AmazonCohereChatConfig,
AmazonConverseConfig,
bedrock_tool_name_mappings,
)
from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
from .llms.bedrock.common_utils import (
AmazonTitanConfig,
AmazonAI21Config,

View file

@ -1,7 +1,6 @@
import json
import logging
import os
import traceback
from datetime import datetime
from logging import Formatter

View file

@ -12,12 +12,11 @@ import json
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
import os
from typing import Dict, List, Optional, Union
from typing import List, Optional, Union
import redis # type: ignore
import redis.asyncio as async_redis # type: ignore
import litellm
from litellm import get_secret, get_secret_str
from ._logging import verbose_logger

View file

@ -1,23 +1,12 @@
# What is this?
## Translates OpenAI call to Anthropic `/v1/messages` format
import json
import os
import traceback
import uuid
from typing import Any, Literal, Optional
import dotenv
import httpx
from pydantic import BaseModel
from typing import Any, Optional
import litellm
from litellm import ChatCompletionRequest, verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.types.llms.anthropic import (
AnthropicMessagesRequest,
AnthropicResponse,
ContentBlockDelta,
)
from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse

View file

@ -7,12 +7,11 @@ from functools import partial
from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
import httpx
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
from openai import AsyncOpenAI, OpenAI
from openai.types.beta.assistant import Assistant
from openai.types.beta.assistant_deleted import AssistantDeleted
import litellm
from litellm.llms.azure import assistants
from litellm.types.router import GenericLiteLLMParams
from litellm.utils import (
exception_type,

View file

@ -144,7 +144,6 @@ def batch_completion_models(*args, **kwargs):
This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
It sends requests concurrently and returns the response from the first model that responds.
"""
import concurrent
if "model" in kwargs:
kwargs.pop("model")

View file

@ -19,24 +19,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union
import httpx
import litellm
from litellm import client
from litellm.llms.azure.azure import AzureBatchesAPI
from litellm.llms.openai.openai import OpenAIBatchesAPI
from litellm.llms.vertex_ai.batches.handler import (
VertexAIBatchPrediction,
)
from litellm.secret_managers.main import get_secret, get_secret_str
from litellm.types.llms.openai import (
Batch,
CancelBatchRequest,
CreateBatchRequest,
CreateFileRequest,
FileContentRequest,
FileObject,
FileTypes,
HttpxBinaryResponseContent,
RetrieveBatchRequest,
)
from litellm.llms.vertex_ai.batches.handler import VertexAIBatchPrediction
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import Batch, CreateBatchRequest, RetrieveBatchRequest
from litellm.types.router import GenericLiteLLMParams
from litellm.utils import supports_httpx_timeout

View file

@ -11,7 +11,7 @@ import json
import os
import threading
import time
from typing import Literal, Optional, Union
from typing import Literal, Optional
import litellm
from litellm.utils import ModelResponse

View file

@ -8,16 +8,12 @@
# Thank you users! We ❤️ you! - Krrish & Ishaan
import ast
import asyncio
import hashlib
import inspect
import io
import json
import logging
import time
import traceback
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
from typing import Any, Dict, List, Optional, Set, Union
from openai.types.audio.transcription_create_params import TranscriptionCreateParams
from openai.types.chat.completion_create_params import (
@ -41,7 +37,7 @@ from litellm.types.utils import all_litellm_params
from .base_cache import BaseCache
from .disk_cache import DiskCache
from .dual_cache import DualCache
from .dual_cache import DualCache # noqa
from .in_memory_cache import InMemoryCache
from .qdrant_semantic_cache import QdrantSemanticCache
from .redis_cache import RedisCache

View file

@ -35,13 +35,7 @@ from pydantic import BaseModel
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.caching.caching import (
Cache,
QdrantSemanticCache,
RedisCache,
RedisSemanticCache,
S3Cache,
)
from litellm.caching.caching import S3Cache
from litellm.litellm_core_utils.logging_utils import (
_assemble_complete_response_from_streaming_chunks,
)
@ -550,12 +544,7 @@ class LLMCachingHandler:
Returns:
Optional[Any]:
"""
from litellm.utils import (
CustomStreamWrapper,
convert_to_model_response_object,
convert_to_streaming_response,
convert_to_streaming_response_async,
)
from litellm.utils import convert_to_model_response_object
if (
call_type == CallTypes.acompletion.value

View file

@ -1,8 +1,6 @@
import json
from typing import TYPE_CHECKING, Any, Optional
from litellm._logging import print_verbose
from .base_cache import BaseCache
if TYPE_CHECKING:

View file

@ -12,7 +12,7 @@ import asyncio
import time
import traceback
from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, List, Optional
import litellm
from litellm._logging import print_verbose, verbose_logger

View file

@ -15,7 +15,6 @@ from typing import Any
import litellm
from litellm._logging import print_verbose
from litellm.types.caching import LiteLLMCacheType
from .base_cache import BaseCache

View file

@ -13,7 +13,6 @@ import asyncio
import inspect
import json
import time
import traceback
from datetime import timedelta
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
@ -21,8 +20,7 @@ import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
from litellm.types.caching import RedisPipelineIncrementOperation
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
from litellm.types.utils import all_litellm_params
from litellm.types.services import ServiceTypes
from .base_cache import BaseCache
@ -53,7 +51,6 @@ class RedisCache(BaseCache):
startup_nodes: Optional[List] = None, # for redis-cluster
**kwargs,
):
import redis
from litellm._service_logger import ServiceLogging

View file

@ -32,7 +32,6 @@ class RedisSemanticCache(BaseCache):
**kwargs,
):
from redisvl.index import SearchIndex
from redisvl.query import VectorQuery
print_verbose(
"redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
@ -141,7 +140,6 @@ class RedisSemanticCache(BaseCache):
def get_cache(self, key, **kwargs):
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
import numpy as np
from redisvl.query import VectorQuery
# query
@ -253,7 +251,6 @@ class RedisSemanticCache(BaseCache):
async def async_get_cache(self, key, **kwargs):
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
import numpy as np
from redisvl.query import VectorQuery
from litellm.proxy.proxy_server import llm_model_list, llm_router

View file

@ -12,11 +12,9 @@ Has 4 methods:
import ast
import asyncio
import json
from typing import Any, Optional
from typing import Optional
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.types.caching import LiteLLMCacheType
from .base_cache import BaseCache
@ -103,7 +101,6 @@ class S3Cache(BaseCache):
self.set_cache(key=key, value=value, **kwargs)
def get_cache(self, key, **kwargs):
import boto3
import botocore
try:

View file

@ -1,7 +1,6 @@
# What is this?
## File for 'response_cost' calculation in Logging
import time
import traceback
from typing import Any, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel
@ -44,14 +43,12 @@ from litellm.llms.openai.cost_calculation import (
cost_per_second as openai_cost_per_second,
)
from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
from litellm.llms.openai.cost_calculation import cost_router as openai_cost_router
from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
from litellm.llms.vertex_ai.image_generation.cost_calculator import (
cost_calculator as vertex_ai_image_cost_calculator,
)
from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.rerank import RerankResponse
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage
from litellm.utils import (
CallTypes,

View file

@ -14,14 +14,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
import httpx
import litellm
from litellm import client, get_secret_str
from litellm import get_secret_str
from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI
from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI
from litellm.llms.vertex_ai.files.handler import (
VertexAIFilesHandler,
)
from litellm.llms.vertex_ai.files.handler import VertexAIFilesHandler
from litellm.types.llms.openai import (
Batch,
CreateFileRequest,
FileContentRequest,
FileTypes,

View file

@ -19,10 +19,10 @@ import httpx
import litellm
from litellm._logging import verbose_logger
from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI, FineTuningJob, FineTuningJobCreate
from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import Hyperparameters
from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
from litellm.types.router import *
from litellm.utils import supports_httpx_timeout

View file

@ -6,11 +6,9 @@ Slack alerts are sent every 10s or when events are greater than X events
see custom_batch_logger.py for more details / defaults
"""
import os
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
from typing import TYPE_CHECKING, Any
from litellm._logging import verbose_logger, verbose_proxy_logger
from litellm.proxy._types import AlertType, WebhookEvent
from litellm._logging import verbose_proxy_logger
if TYPE_CHECKING:
from .slack_alerting import SlackAlerting as _SlackAlerting
@ -21,7 +19,6 @@ else:
def squash_payloads(queue):
import json
squashed = {}
if len(queue) == 0:

View file

@ -4,16 +4,10 @@ import asyncio
import datetime
import os
import random
import threading
import time
import traceback
from datetime import datetime as dt
from datetime import timedelta, timezone
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, Union, get_args
from datetime import timedelta
from typing import Any, Dict, List, Literal, Optional, Union
import aiohttp
import dotenv
from openai import APIError
import litellm
@ -26,22 +20,13 @@ from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.litellm_core_utils.exception_mapping_utils import (
_add_key_name_and_team_to_alert,
)
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.proxy._types import (
AlertType,
CallInfo,
UserAPIKeyAuth,
VirtualKeyEvent,
WebhookEvent,
)
from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
from litellm.router import Router
from litellm.types.integrations.slack_alerting import *
from litellm.types.router import LiteLLM_Params
from ..email_templates.templates import *
from .batching_handler import send_to_webhook, squash_payloads
@ -1261,7 +1246,7 @@ Model Info:
Returns -> True if sent, False if not.
"""
from litellm.proxy.proxy_server import premium_user, prisma_client
from litellm.proxy.proxy_server import premium_user
from litellm.proxy.utils import send_email
email_logo_url = os.getenv(
@ -1370,7 +1355,6 @@ Model Info:
if alert_type not in self.alert_types:
return
import json
from datetime import datetime
# Get the current timestamp

View file

@ -5,7 +5,6 @@ Utils used for slack alerting
import asyncio
from typing import Dict, List, Optional, Union
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.proxy._types import AlertType
from litellm.secret_managers.main import get_secret

View file

@ -6,14 +6,9 @@ import asyncio
import json
import os
import random
import time
import traceback
import types
import uuid
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict, Union
from typing import Any, Dict, List, Optional
import dotenv # type: ignore
import httpx
from pydantic import BaseModel # type: ignore
@ -21,11 +16,7 @@ import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.prompt_templates.common_utils import (
get_content_from_model_response,
)
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
httpxSpecialProvider,
)
@ -33,7 +24,6 @@ from litellm.types.integrations.argilla import (
SUPPORTED_PAYLOAD_FIELDS,
ArgillaCredentialsObject,
ArgillaItem,
ArgillaPayload,
)
from litellm.types.utils import StandardLoggingPayload

View file

@ -5,7 +5,7 @@ this file has Arize ai specific helper functions
"""
import json
from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Any, Optional
from litellm._logging import verbose_logger
@ -30,7 +30,6 @@ class ArizeLogger:
def set_arize_ai_attributes(span: Span, kwargs, response_obj):
from litellm.integrations._types.open_inference import (
MessageAttributes,
MessageContentAttributes,
OpenInferenceSpanKindValues,
SpanAttributes,
)

View file

@ -3,23 +3,8 @@ import json
import os
import uuid
from datetime import datetime, timedelta
from re import S, T
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Optional,
Tuple,
TypedDict,
Union,
)
from typing import List, Optional
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger
from litellm.constants import AZURE_STORAGE_MSFT_VERSION
from litellm.integrations.custom_batch_logger import CustomBatchLogger

View file

@ -2,15 +2,10 @@
## Log success + failure events to Braintrust
import copy
import json
import os
import threading
import traceback
import uuid
from datetime import datetime
from typing import Literal, Optional
from typing import Optional
import dotenv
import httpx
from pydantic import BaseModel
@ -18,12 +13,11 @@ import litellm
from litellm import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.utils import get_formatted_prompt, print_verbose
from litellm.utils import print_verbose
global_braintrust_http_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback

View file

@ -6,7 +6,7 @@ Use this if you want your logs to be stored in memory and flushed periodically
import asyncio
import time
from typing import List, Literal, Optional
from typing import List, Optional
import litellm
from litellm._logging import verbose_logger

View file

@ -1,4 +1,4 @@
from typing import List, Literal, Optional
from typing import List, Optional
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger

View file

@ -1,18 +1,14 @@
#### What this does ####
# On success, logs events to Promptlayer
import os
import traceback
from datetime import datetime as datetimeObj
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
import dotenv
from pydantic import BaseModel
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.argilla import ArgillaItem
from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import (
AdapterCompletionStreamWrapper,
EmbeddingResponse,

View file

@ -16,11 +16,10 @@ For batching specific details see CustomBatchLogger class
import asyncio
import datetime
import os
import sys
import traceback
import uuid
from datetime import datetime as datetimeObj
from typing import Any, Dict, List, Optional, Union
from typing import Any, List, Optional, Union
from httpx import Response
@ -32,7 +31,6 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import StandardLoggingPayload

View file

@ -8,12 +8,9 @@ API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=examp
import asyncio
import os
import traceback
import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional, Union
from httpx import Response
from typing import Any, Dict, List, Optional
import litellm
from litellm._logging import verbose_logger

View file

@ -1,14 +1,11 @@
#### What this does ####
# On success + failure, log events to Supabase
import datetime
import os
import traceback
import uuid
from typing import Any
import dotenv
import litellm

View file

@ -2,7 +2,6 @@
Functions for sending Email Alerts
"""
import asyncio
import os
from typing import List, Optional
@ -20,7 +19,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
)
if team_id is None:
return []
from litellm.proxy.proxy_server import premium_user, prisma_client
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
raise Exception("Not connected to DB!")
@ -72,7 +71,6 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
Send an Email Alert to All Team Members when the Team Budget is crossed
Returns -> True if sent, False if not.
"""
from litellm.proxy.proxy_server import premium_user, prisma_client
from litellm.proxy.utils import send_email
_team_id = webhook_event.team_id

View file

@ -1,15 +1,12 @@
import os
from datetime import datetime
from typing import Any, Dict, List, Optional
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import (
_get_httpx_client,
get_async_httpx_client,
httpxSpecialProvider,
)

View file

@ -1,27 +1,14 @@
import asyncio
import json
import os
import uuid
from datetime import datetime
from re import S
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
from litellm.proxy._types import CommonProxyErrors
from litellm.types.integrations.gcs_bucket import *
from litellm.types.utils import (
StandardCallbackDynamicParams,
StandardLoggingMetadata,
StandardLoggingPayload,
)
from litellm.types.utils import StandardLoggingPayload
if TYPE_CHECKING:
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase

View file

@ -1,13 +1,7 @@
import json
import os
import uuid
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
import httpx
from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.custom_httpx.http_handler import (
@ -15,11 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider,
)
from litellm.types.integrations.gcs_bucket import *
from litellm.types.utils import (
StandardCallbackDynamicParams,
StandardLoggingMetadata,
StandardLoggingPayload,
)
from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload
if TYPE_CHECKING:
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -190,9 +180,7 @@ class GCSBucketBase(CustomBatchLogger):
This function is used to get the Vertex instance for the GCS Bucket Logger.
It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it.
"""
from litellm.llms.vertex_ai.vertex_llm_base import (
VertexBase,
)
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
_in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials)
if _in_memory_key not in self.vertex_instances:

View file

@ -3,10 +3,7 @@
import os
import traceback
import dotenv
import litellm
from litellm._logging import verbose_logger
class HeliconeLogger:

View file

@ -3,11 +3,9 @@
import json
import os
import traceback
import uuid
from typing import Literal, Optional
import dotenv
import httpx
import litellm

View file

@ -3,7 +3,6 @@
import copy
import os
import traceback
import types
from collections.abc import MutableMapping, MutableSequence, MutableSet
from typing import TYPE_CHECKING, Any, Dict, Optional, cast

View file

@ -6,11 +6,8 @@ Used to get the LangFuseLogger for a given request
Handles Key/Team Based Langfuse Logging
"""
import os
from typing import TYPE_CHECKING, Any, Dict, Optional
from packaging.version import Version
from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams
from .langfuse import LangFuseLogger, LangfuseLoggingConfig

View file

@ -3,14 +3,12 @@
import asyncio
import os
import random
import time
import traceback
import types
import uuid
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict, Union
from typing import Any, Dict, List, Optional
import dotenv # type: ignore
import httpx
from pydantic import BaseModel # type: ignore
@ -18,7 +16,6 @@ import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
httpxSpecialProvider,
)

View file

@ -1,9 +1,7 @@
import traceback
import json
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import SpanAttributes
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Optional, Union
from litellm.proxy._types import SpanAttributes
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span

View file

@ -3,17 +3,12 @@
import json
import os
import traceback
import uuid
import dotenv
import httpx
import litellm
from litellm import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
httpxSpecialProvider,

View file

@ -1,7 +1,6 @@
import os
from dataclasses import dataclass
from datetime import datetime
from functools import wraps
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import litellm
@ -10,10 +9,7 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import (
ChatCompletionMessageToolCall,
EmbeddingResponse,
Function,
ImageResponse,
ModelResponse,
StandardLoggingPayload,
)
@ -139,7 +135,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[datetime, float]] = None,
event_metadata: Optional[dict] = None,
):
from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
@ -201,7 +196,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[float, datetime]] = None,
event_metadata: Optional[dict] = None,
):
from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
@ -666,7 +660,6 @@ class OpenTelemetry(CustomLogger):
span.set_attribute(key, primitive_value)
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
from litellm.proxy._types import SpanAttributes
kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {}
@ -834,7 +827,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
@ -889,7 +881,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode

View file

@ -3,8 +3,6 @@ import os
import time
from typing import Dict, Final, List, Optional
from litellm.types.utils import ModelResponse
CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config"

View file

@ -1,15 +1,10 @@
# used for /metrics endpoint on LiteLLM Proxy
#### What this does ####
# On success, log events to Prometheus
import os
import subprocess
import sys
import traceback
import uuid
from datetime import date, datetime, timedelta
from typing import Optional, TypedDict, Union
from datetime import datetime, timedelta
from typing import Optional
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth

View file

@ -2,13 +2,10 @@
Helper functions to query prometheus API
"""
import asyncio
import os
import time
from datetime import datetime, timedelta
from typing import Optional
import litellm
from litellm import get_secret
from litellm._logging import verbose_logger
from litellm.llms.custom_httpx.http_handler import (

View file

@ -3,15 +3,8 @@
# On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
import datetime
import os
import subprocess
import sys
import traceback
import uuid
from typing import List, Optional, Union
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
from litellm.types.services import ServiceLoggerPayload, ServiceTypes

View file

@ -1,12 +1,6 @@
#### What this does ####
# On success + failure, log events to Supabase
import datetime
import os
import subprocess
import sys
import traceback
import uuid
from typing import Optional
import litellm

View file

@ -1,14 +1,11 @@
#### What this does ####
# On success + failure, log events to Supabase
import datetime
import os
import subprocess
import sys
import traceback
import dotenv
import litellm

View file

@ -1,6 +1,5 @@
import traceback
import litellm
from litellm._logging import verbose_logger
@ -12,9 +11,7 @@ class TraceloopLogger:
def __init__(self):
try:
from opentelemetry.sdk.trace.export import ConsoleSpanExporter
from traceloop.sdk import Traceloop
from traceloop.sdk.instruments import Instruments
from traceloop.sdk.tracing.tracing import TracerWrapper
except ModuleNotFoundError as e:
verbose_logger.error(
@ -39,7 +36,6 @@ class TraceloopLogger:
level="DEFAULT",
status_message=None,
):
from opentelemetry import trace
from opentelemetry.semconv.ai import SpanAttributes
from opentelemetry.trace import SpanKind, Status, StatusCode
@ -78,7 +74,7 @@ class TraceloopLogger:
)
if "top_p" in optional_params:
span.set_attribute(
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
)
if "tools" in optional_params or "functions" in optional_params:
span.set_attribute(

View file

@ -173,16 +173,14 @@ except Exception:
#### What this does ####
# On success, logs events to Langfuse
import os
import traceback
from datetime import datetime
class WeightsBiasesLogger:
# Class variables or attributes
def __init__(self):
try:
import wandb
pass
except Exception:
raise Exception(
"\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"

View file

@ -3,7 +3,6 @@ from typing import Awaitable, Callable, Optional
import anyio
import anyio.to_thread
from anyio import to_thread
from typing_extensions import ParamSpec, TypeVar
T_ParamSpec = ParamSpec("T_ParamSpec")

View file

@ -1,7 +1,6 @@
# What is this?
## Helper utilities
import os
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Optional, Union
import httpx

View file

@ -1,6 +1,4 @@
import json
import os
import threading
import traceback
from typing import Optional
@ -14,17 +12,14 @@ from ..exceptions import (
APIError,
AuthenticationError,
BadRequestError,
BudgetExceededError,
ContentPolicyViolationError,
ContextWindowExceededError,
NotFoundError,
OpenAIError,
PermissionDeniedError,
RateLimitError,
ServiceUnavailableError,
Timeout,
UnprocessableEntityError,
UnsupportedParamsError,
)

Some files were not shown because too many files have changed in this diff Show more