mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
(code quality) run ruff rule to ban unused imports (#7313)
* remove unused imports * fix AmazonConverseConfig * fix test * fix import * ruff check fixes * test fixes * fix testing * fix imports
This commit is contained in:
parent
5e344497ce
commit
c7f14e936a
347 changed files with 5473 additions and 7207 deletions
6
.github/workflows/locustfile.py
vendored
6
.github/workflows/locustfile.py
vendored
|
@ -1,6 +1,4 @@
|
|||
from locust import HttpUser, task, between, events
|
||||
import json
|
||||
import time
|
||||
from locust import HttpUser, task, between
|
||||
|
||||
|
||||
class MyUser(HttpUser):
|
||||
|
@ -10,7 +8,7 @@ class MyUser(HttpUser):
|
|||
def chat_completion(self):
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
|
||||
"Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
|
||||
# Include any additional headers you may need for authentication, etc.
|
||||
}
|
||||
|
||||
|
|
1470
cookbook/Benchmarking_LLMs_by_use_case.ipynb
vendored
1470
cookbook/Benchmarking_LLMs_by_use_case.ipynb
vendored
File diff suppressed because one or more lines are too long
1154
cookbook/Evaluating_LLMs.ipynb
vendored
1154
cookbook/Evaluating_LLMs.ipynb
vendored
File diff suppressed because one or more lines are too long
833
cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
vendored
833
cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
vendored
|
@ -1,423 +1,422 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "BmX0b5Ueh91v"
|
||||
},
|
||||
"source": [
|
||||
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
|
||||
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
|
||||
"* Completion - Quick start\n",
|
||||
"* Completion - Streaming\n",
|
||||
"* Completion - Azure, OpenAI in separate threads\n",
|
||||
"* Completion - Stress Test 10 requests in parallel\n",
|
||||
"* Completion - Azure, OpenAI in the same thread"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
|
||||
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
|
||||
"* Completion - Quick start\n",
|
||||
"* Completion - Streaming\n",
|
||||
"* Completion - Azure, OpenAI in separate threads\n",
|
||||
"* Completion - Stress Test 10 requests in parallel\n",
|
||||
"* Completion - Azure, OpenAI in the same thread"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "BmX0b5Ueh91v"
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "iHq4d0dpfawS"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "mnveHO5dfcB0"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "eo88QUdbiDIE"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Quick start"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "5OSosWNCfc_2",
|
||||
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "iHq4d0dpfawS"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os, litellm"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "mnveHO5dfcB0"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Completion - Quick start"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "eo88QUdbiDIE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# openai call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"gpt-3.5-turbo\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Openai Response\\n\")\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# azure call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"azure/your-azure-deployment\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Azure Response\\n\")\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "5OSosWNCfc_2",
|
||||
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
|
||||
},
|
||||
"execution_count": 12,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Openai Response\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694708958,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 26,\n",
|
||||
" \"total_tokens\": 39\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Azure Response\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694708960,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 27,\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"total_tokens\": 41\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Completion - Streaming"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "dQMkM-diiKdE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# openai call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"gpt-3.5-turbo\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(\"OpenAI Streaming response\")\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)\n",
|
||||
"\n",
|
||||
"# azure call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"azure/your-azure-deployment\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(\"Azure Streaming response\")\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "uVvJDVn4g1i1"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Completion - Azure, OpenAI in separate threads"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "4xrOPnt-oqwm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import threading\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make a completion call\n",
|
||||
"def make_completion(model, messages):\n",
|
||||
" response = completion(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Response for {model}: {response}\")\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"# Define the messages for the completions\n",
|
||||
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# Create threads for making the completions\n",
|
||||
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
|
||||
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
|
||||
"\n",
|
||||
"# Start both threads\n",
|
||||
"thread1.start()\n",
|
||||
"thread2.start()\n",
|
||||
"\n",
|
||||
"# Wait for both threads to finish\n",
|
||||
"thread1.join()\n",
|
||||
"thread2.join()\n",
|
||||
"\n",
|
||||
"print(\"Both completions are done.\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "V5b5taJPjvC3"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Completion - Stress Test 10 requests in parallel\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "lx8DbMBqoAoN"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import threading\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make a completion call\n",
|
||||
"def make_completion(model, messages):\n",
|
||||
" response = completion(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Response for {model}: {response}\")\n",
|
||||
"\n",
|
||||
"# Set your API keys\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"# Define the messages for the completions\n",
|
||||
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# Create and start 10 threads for making completions\n",
|
||||
"threads = []\n",
|
||||
"for i in range(10):\n",
|
||||
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
|
||||
" threads.append(thread)\n",
|
||||
" thread.start()\n",
|
||||
"\n",
|
||||
"# Wait for all threads to finish\n",
|
||||
"for thread in threads:\n",
|
||||
" thread.join()\n",
|
||||
"\n",
|
||||
"print(\"All completions are done.\")\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "pHYANOlOkoDh"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Completion - Azure, OpenAI in the same thread"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "yB2NDOO4oxrp"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make both OpenAI and Azure completions\n",
|
||||
"def make_completions():\n",
|
||||
" # Set your OpenAI API key\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
" # OpenAI completion\n",
|
||||
" openai_response = completion(\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"OpenAI Response:\", openai_response)\n",
|
||||
"\n",
|
||||
" # Set your Azure OpenAI API key and configuration\n",
|
||||
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
" # Azure OpenAI completion\n",
|
||||
" azure_response = completion(\n",
|
||||
" model=\"azure/your-azure-deployment\",\n",
|
||||
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"Azure OpenAI Response:\", azure_response)\n",
|
||||
"\n",
|
||||
"# Call the function to make both completions in one thread\n",
|
||||
"make_completions()\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "HTBqwzxpnxab",
|
||||
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
|
||||
},
|
||||
"execution_count": 23,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"OpenAI Response: {\n",
|
||||
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694710847,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 29,\n",
|
||||
" \"total_tokens\": 42\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Azure OpenAI Response: {\n",
|
||||
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694710849,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 29,\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"total_tokens\": 43\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Openai Response\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694708958,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 26,\n",
|
||||
" \"total_tokens\": 39\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Azure Response\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694708960,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 27,\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"total_tokens\": 41\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# openai call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"gpt-3.5-turbo\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Openai Response\\n\")\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# azure call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"azure/your-azure-deployment\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Azure Response\\n\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "dQMkM-diiKdE"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Streaming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "uVvJDVn4g1i1"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# openai call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"gpt-3.5-turbo\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(\"OpenAI Streaming response\")\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)\n",
|
||||
"\n",
|
||||
"# azure call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"azure/your-azure-deployment\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(\"Azure Streaming response\")\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4xrOPnt-oqwm"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Azure, OpenAI in separate threads"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "V5b5taJPjvC3"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import threading\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make a completion call\n",
|
||||
"def make_completion(model, messages):\n",
|
||||
" response = completion(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Response for {model}: {response}\")\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"# Define the messages for the completions\n",
|
||||
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# Create threads for making the completions\n",
|
||||
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
|
||||
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
|
||||
"\n",
|
||||
"# Start both threads\n",
|
||||
"thread1.start()\n",
|
||||
"thread2.start()\n",
|
||||
"\n",
|
||||
"# Wait for both threads to finish\n",
|
||||
"thread1.join()\n",
|
||||
"thread2.join()\n",
|
||||
"\n",
|
||||
"print(\"Both completions are done.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "lx8DbMBqoAoN"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Stress Test 10 requests in parallel\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "pHYANOlOkoDh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import threading\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make a completion call\n",
|
||||
"def make_completion(model, messages):\n",
|
||||
" response = completion(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Response for {model}: {response}\")\n",
|
||||
"\n",
|
||||
"# Set your API keys\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"# Define the messages for the completions\n",
|
||||
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# Create and start 10 threads for making completions\n",
|
||||
"threads = []\n",
|
||||
"for i in range(10):\n",
|
||||
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
|
||||
" threads.append(thread)\n",
|
||||
" thread.start()\n",
|
||||
"\n",
|
||||
"# Wait for all threads to finish\n",
|
||||
"for thread in threads:\n",
|
||||
" thread.join()\n",
|
||||
"\n",
|
||||
"print(\"All completions are done.\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "yB2NDOO4oxrp"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Azure, OpenAI in the same thread"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "HTBqwzxpnxab",
|
||||
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI Response: {\n",
|
||||
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694710847,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 29,\n",
|
||||
" \"total_tokens\": 42\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Azure OpenAI Response: {\n",
|
||||
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694710849,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 29,\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"total_tokens\": 43\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make both OpenAI and Azure completions\n",
|
||||
"def make_completions():\n",
|
||||
" # Set your OpenAI API key\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
" # OpenAI completion\n",
|
||||
" openai_response = completion(\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"OpenAI Response:\", openai_response)\n",
|
||||
"\n",
|
||||
" # Set your Azure OpenAI API key and configuration\n",
|
||||
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
" # Azure OpenAI completion\n",
|
||||
" azure_response = completion(\n",
|
||||
" model=\"azure/your-azure-deployment\",\n",
|
||||
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"Azure OpenAI Response:\", azure_response)\n",
|
||||
"\n",
|
||||
"# Call the function to make both completions in one thread\n",
|
||||
"make_completions()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
867
cookbook/LiteLLM_Comparing_LLMs.ipynb
vendored
867
cookbook/LiteLLM_Comparing_LLMs.ipynb
vendored
File diff suppressed because one or more lines are too long
315
cookbook/LiteLLM_batch_completion.ipynb
vendored
315
cookbook/LiteLLM_batch_completion.ipynb
vendored
|
@ -1,166 +1,163 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "MbLbs1tbISk-"
|
||||
},
|
||||
"source": [
|
||||
"# LiteLLM Batch Completions Example\n",
|
||||
"\n",
|
||||
"* This tutorial walks through using `batch_completion`\n",
|
||||
"* Docs: https://docs.litellm.ai/docs/completion/batching"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# LiteLLM Batch Completions Example\n",
|
||||
"\n",
|
||||
"* This tutorial walks through using `batch_completion`\n",
|
||||
"* Docs: https://docs.litellm.ai/docs/completion/batching"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "MbLbs1tbISk-"
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Ty6-ko_aDlPF"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "KGhNJRUCIh1j"
|
||||
},
|
||||
"source": [
|
||||
"## Import Batch Completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"id": "LOtI43snDrSK"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import batch_completion\n",
|
||||
"\n",
|
||||
"# set your API_KEY\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Xhv92NBaIpaw"
|
||||
},
|
||||
"source": [
|
||||
"## Calling `litellm.batch_completion`\n",
|
||||
"\n",
|
||||
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "yY7GIRLsDywu",
|
||||
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Ty6-ko_aDlPF"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Import Batch Completion"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "KGhNJRUCIh1j"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"import os\n",
|
||||
"from litellm import batch_completion\n",
|
||||
"\n",
|
||||
"# set your API_KEY\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "LOtI43snDrSK"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calling `litellm.batch_completion`\n",
|
||||
"\n",
|
||||
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Xhv92NBaIpaw"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"import os\n",
|
||||
"from litellm import batch_completion\n",
|
||||
"\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"responses = batch_completion(\n",
|
||||
" model=\"claude-2\",\n",
|
||||
" messages = [\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"good morning? \"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"what's the time? \"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"responses"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "yY7GIRLsDywu",
|
||||
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
|
||||
},
|
||||
"execution_count": 11,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" Good morning!\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694030351.309254,\n",
|
||||
" \"model\": \"claude-2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 11,\n",
|
||||
" \"completion_tokens\": 3,\n",
|
||||
" \"total_tokens\": 14\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694030352.1215081,\n",
|
||||
" \"model\": \"claude-2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 22,\n",
|
||||
" \"total_tokens\": 35\n",
|
||||
" }\n",
|
||||
" }]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 11
|
||||
}
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" Good morning!\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694030351.309254,\n",
|
||||
" \"model\": \"claude-2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 11,\n",
|
||||
" \"completion_tokens\": 3,\n",
|
||||
" \"total_tokens\": 14\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694030352.1215081,\n",
|
||||
" \"model\": \"claude-2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 22,\n",
|
||||
" \"total_tokens\": 35\n",
|
||||
" }\n",
|
||||
" }]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
]
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"responses = batch_completion(\n",
|
||||
" model=\"claude-2\",\n",
|
||||
" messages = [\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"good morning? \"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"what's the time? \"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"responses"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
403
cookbook/Proxy_Batch_Users.ipynb
vendored
403
cookbook/Proxy_Batch_Users.ipynb
vendored
|
@ -1,204 +1,205 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "680oRk1af-xJ"
|
||||
},
|
||||
"source": [
|
||||
"# Environment Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "X7TgJFn8f88p"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import csv\n",
|
||||
"from typing import Optional\n",
|
||||
"import httpx, json\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
|
||||
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "rauw8EOhgBz5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
|
||||
"class HTTPHandler:\n",
|
||||
" def __init__(self, concurrent_limit=1000):\n",
|
||||
" # Create a client with a connection pool\n",
|
||||
" self.client = httpx.AsyncClient(\n",
|
||||
" limits=httpx.Limits(\n",
|
||||
" max_connections=concurrent_limit,\n",
|
||||
" max_keepalive_connections=concurrent_limit,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" async def close(self):\n",
|
||||
" # Close the client when you're done with it\n",
|
||||
" await self.client.aclose()\n",
|
||||
"\n",
|
||||
" async def get(\n",
|
||||
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
|
||||
" ):\n",
|
||||
" response = await self.client.get(url, params=params, headers=headers)\n",
|
||||
" return response\n",
|
||||
"\n",
|
||||
" async def post(\n",
|
||||
" self,\n",
|
||||
" url: str,\n",
|
||||
" data: Optional[dict] = None,\n",
|
||||
" params: Optional[dict] = None,\n",
|
||||
" headers: Optional[dict] = None,\n",
|
||||
" ):\n",
|
||||
" try:\n",
|
||||
" response = await self.client.post(\n",
|
||||
" url, data=data, params=params, headers=headers\n",
|
||||
" )\n",
|
||||
" return response\n",
|
||||
" except Exception as e:\n",
|
||||
" raise e\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "7LXN8zaLgOie"
|
||||
},
|
||||
"source": [
|
||||
"# Import Sheet\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Format: | ID | Name | Max Budget |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "oiED0usegPGf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"async def import_sheet():\n",
|
||||
" tasks = []\n",
|
||||
" http_client = HTTPHandler()\n",
|
||||
" with open('my-batch-sheet.csv', 'r') as file:\n",
|
||||
" csv_reader = csv.DictReader(file)\n",
|
||||
" for row in csv_reader:\n",
|
||||
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
|
||||
" tasks.append(task)\n",
|
||||
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
|
||||
"\n",
|
||||
" keys = await asyncio.gather(*tasks)\n",
|
||||
"\n",
|
||||
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
|
||||
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
|
||||
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
|
||||
" csv_writer.writeheader()\n",
|
||||
"\n",
|
||||
" with open('my-batch-sheet.csv', 'r') as file:\n",
|
||||
" csv_reader = csv.DictReader(file)\n",
|
||||
" for i, row in enumerate(csv_reader):\n",
|
||||
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
|
||||
" csv_writer.writerow(row)\n",
|
||||
"\n",
|
||||
" await http_client.close()\n",
|
||||
"\n",
|
||||
"asyncio.run(import_sheet())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E7M0Li_UgJeZ"
|
||||
},
|
||||
"source": [
|
||||
"# Create Users + Keys\n",
|
||||
"\n",
|
||||
"- Creates a user\n",
|
||||
"- Creates a key with max budget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NZudRFujf7j-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
|
||||
" global proxy_base_url\n",
|
||||
" if not proxy_base_url.endswith(\"/\"):\n",
|
||||
" proxy_base_url += \"/\"\n",
|
||||
" url = proxy_base_url + \"key/generate\"\n",
|
||||
"\n",
|
||||
" # call /key/generate\n",
|
||||
" print(\"CALLING /KEY/GENERATE\")\n",
|
||||
" response = await client.post(\n",
|
||||
" url=url,\n",
|
||||
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
|
||||
" data=json.dumps({\n",
|
||||
" \"user_id\": user_id,\n",
|
||||
" \"key_alias\": f\"{user_id}-key\",\n",
|
||||
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
|
||||
" })\n",
|
||||
" )\n",
|
||||
" print(f\"response: {response.text}\")\n",
|
||||
" return response.json()[\"key\"]\n",
|
||||
"\n",
|
||||
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
|
||||
" \"\"\"\n",
|
||||
" - call /user/new\n",
|
||||
" - create key for user\n",
|
||||
" \"\"\"\n",
|
||||
" global proxy_base_url\n",
|
||||
" if not proxy_base_url.endswith(\"/\"):\n",
|
||||
" proxy_base_url += \"/\"\n",
|
||||
" url = proxy_base_url + \"user/new\"\n",
|
||||
"\n",
|
||||
" # call /user/new\n",
|
||||
" await client.post(\n",
|
||||
" url=url,\n",
|
||||
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
|
||||
" data=json.dumps({\n",
|
||||
" \"user_id\": user_id,\n",
|
||||
" \"user_alias\": user_name,\n",
|
||||
" \"auto_create_key\": False,\n",
|
||||
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
|
||||
" })\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # create key for user\n",
|
||||
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "680oRk1af-xJ"
|
||||
},
|
||||
"source": [
|
||||
"# Environment Setup"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "X7TgJFn8f88p"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import csv\n",
|
||||
"from typing import Optional\n",
|
||||
"import httpx\n",
|
||||
"import json\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
|
||||
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "rauw8EOhgBz5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
|
||||
"class HTTPHandler:\n",
|
||||
" def __init__(self, concurrent_limit=1000):\n",
|
||||
" # Create a client with a connection pool\n",
|
||||
" self.client = httpx.AsyncClient(\n",
|
||||
" limits=httpx.Limits(\n",
|
||||
" max_connections=concurrent_limit,\n",
|
||||
" max_keepalive_connections=concurrent_limit,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" async def close(self):\n",
|
||||
" # Close the client when you're done with it\n",
|
||||
" await self.client.aclose()\n",
|
||||
"\n",
|
||||
" async def get(\n",
|
||||
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
|
||||
" ):\n",
|
||||
" response = await self.client.get(url, params=params, headers=headers)\n",
|
||||
" return response\n",
|
||||
"\n",
|
||||
" async def post(\n",
|
||||
" self,\n",
|
||||
" url: str,\n",
|
||||
" data: Optional[dict] = None,\n",
|
||||
" params: Optional[dict] = None,\n",
|
||||
" headers: Optional[dict] = None,\n",
|
||||
" ):\n",
|
||||
" try:\n",
|
||||
" response = await self.client.post(\n",
|
||||
" url, data=data, params=params, headers=headers\n",
|
||||
" )\n",
|
||||
" return response\n",
|
||||
" except Exception as e:\n",
|
||||
" raise e\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "7LXN8zaLgOie"
|
||||
},
|
||||
"source": [
|
||||
"# Import Sheet\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Format: | ID | Name | Max Budget |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "oiED0usegPGf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"async def import_sheet():\n",
|
||||
" tasks = []\n",
|
||||
" http_client = HTTPHandler()\n",
|
||||
" with open('my-batch-sheet.csv', 'r') as file:\n",
|
||||
" csv_reader = csv.DictReader(file)\n",
|
||||
" for row in csv_reader:\n",
|
||||
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
|
||||
" tasks.append(task)\n",
|
||||
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
|
||||
"\n",
|
||||
" keys = await asyncio.gather(*tasks)\n",
|
||||
"\n",
|
||||
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
|
||||
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
|
||||
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
|
||||
" csv_writer.writeheader()\n",
|
||||
"\n",
|
||||
" with open('my-batch-sheet.csv', 'r') as file:\n",
|
||||
" csv_reader = csv.DictReader(file)\n",
|
||||
" for i, row in enumerate(csv_reader):\n",
|
||||
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
|
||||
" csv_writer.writerow(row)\n",
|
||||
"\n",
|
||||
" await http_client.close()\n",
|
||||
"\n",
|
||||
"asyncio.run(import_sheet())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E7M0Li_UgJeZ"
|
||||
},
|
||||
"source": [
|
||||
"# Create Users + Keys\n",
|
||||
"\n",
|
||||
"- Creates a user\n",
|
||||
"- Creates a key with max budget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NZudRFujf7j-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
|
||||
" global proxy_base_url\n",
|
||||
" if not proxy_base_url.endswith(\"/\"):\n",
|
||||
" proxy_base_url += \"/\"\n",
|
||||
" url = proxy_base_url + \"key/generate\"\n",
|
||||
"\n",
|
||||
" # call /key/generate\n",
|
||||
" print(\"CALLING /KEY/GENERATE\")\n",
|
||||
" response = await client.post(\n",
|
||||
" url=url,\n",
|
||||
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
|
||||
" data=json.dumps({\n",
|
||||
" \"user_id\": user_id,\n",
|
||||
" \"key_alias\": f\"{user_id}-key\",\n",
|
||||
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
|
||||
" })\n",
|
||||
" )\n",
|
||||
" print(f\"response: {response.text}\")\n",
|
||||
" return response.json()[\"key\"]\n",
|
||||
"\n",
|
||||
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
|
||||
" \"\"\"\n",
|
||||
" - call /user/new\n",
|
||||
" - create key for user\n",
|
||||
" \"\"\"\n",
|
||||
" global proxy_base_url\n",
|
||||
" if not proxy_base_url.endswith(\"/\"):\n",
|
||||
" proxy_base_url += \"/\"\n",
|
||||
" url = proxy_base_url + \"user/new\"\n",
|
||||
"\n",
|
||||
" # call /user/new\n",
|
||||
" await client.post(\n",
|
||||
" url=url,\n",
|
||||
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
|
||||
" data=json.dumps({\n",
|
||||
" \"user_id\": user_id,\n",
|
||||
" \"user_alias\": user_name,\n",
|
||||
" \"auto_create_key\": False,\n",
|
||||
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
|
||||
" })\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # create key for user\n",
|
||||
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
|
|
2007
cookbook/TogetherAI_liteLLM.ipynb
vendored
2007
cookbook/TogetherAI_liteLLM.ipynb
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,159 +1,157 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "eKXncoQbU_2j"
|
||||
},
|
||||
"source": [
|
||||
"# Using Nemo-Guardrails with LiteLLM Server\n",
|
||||
"\n",
|
||||
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Using Nemo-Guardrails with LiteLLM Server\n",
|
||||
"\n",
|
||||
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "eKXncoQbU_2j"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Using with Bedrock\n",
|
||||
"\n",
|
||||
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "ZciYaLwvuFbu"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"pip install nemoguardrails langchain"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "vOUwGSJ2Vsy3"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "xXEJNxe7U0IN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
|
||||
"\n",
|
||||
"from nemoguardrails import LLMRails, RailsConfig\n",
|
||||
"\n",
|
||||
"config = RailsConfig.from_path(\"./config.yml\")\n",
|
||||
"app = LLMRails(config, llm=llm)\n",
|
||||
"\n",
|
||||
"new_message = app.generate(messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Hello! What can you do for me?\"\n",
|
||||
"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Using with TogetherAI\n",
|
||||
"\n",
|
||||
"1. You can either set this in the server environment:\n",
|
||||
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
|
||||
"\n",
|
||||
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "vz5n00qyuKjp"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
|
||||
"\n",
|
||||
"from nemoguardrails import LLMRails, RailsConfig\n",
|
||||
"\n",
|
||||
"config = RailsConfig.from_path(\"./config.yml\")\n",
|
||||
"app = LLMRails(config, llm=llm)\n",
|
||||
"\n",
|
||||
"new_message = app.generate(messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Hello! What can you do for me?\"\n",
|
||||
"}])"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "XK1sk-McuhpE"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### CONFIG.YML\n",
|
||||
"\n",
|
||||
"save this example `config.yml` in your current directory"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "8A1KWKnzuxAS"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# instructions:\n",
|
||||
"# - type: general\n",
|
||||
"# content: |\n",
|
||||
"# Below is a conversation between a bot and a user about the recent job reports.\n",
|
||||
"# The bot is factual and concise. If the bot does not know the answer to a\n",
|
||||
"# question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"# sample_conversation: |\n",
|
||||
"# user \"Hello there!\"\n",
|
||||
"# express greeting\n",
|
||||
"# bot express greeting\n",
|
||||
"# \"Hello! How can I assist you today?\"\n",
|
||||
"# user \"What can you do for me?\"\n",
|
||||
"# ask about capabilities\n",
|
||||
"# bot respond about capabilities\n",
|
||||
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
|
||||
"# user \"What's 2+2?\"\n",
|
||||
"# ask math question\n",
|
||||
"# bot responds to math question\n",
|
||||
"# \"2+2 is equal to 4.\"\n",
|
||||
"\n",
|
||||
"# models:\n",
|
||||
"# - type: main\n",
|
||||
"# engine: openai\n",
|
||||
"# model: claude-instant-1"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "NKN1GmSvu0Cx"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ZciYaLwvuFbu"
|
||||
},
|
||||
"source": [
|
||||
"## Using with Bedrock\n",
|
||||
"\n",
|
||||
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "vOUwGSJ2Vsy3"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install nemoguardrails langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "xXEJNxe7U0IN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
|
||||
"\n",
|
||||
"from nemoguardrails import LLMRails, RailsConfig\n",
|
||||
"\n",
|
||||
"config = RailsConfig.from_path(\"./config.yml\")\n",
|
||||
"app = LLMRails(config, llm=llm)\n",
|
||||
"\n",
|
||||
"new_message = app.generate(messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Hello! What can you do for me?\"\n",
|
||||
"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "vz5n00qyuKjp"
|
||||
},
|
||||
"source": [
|
||||
"## Using with TogetherAI\n",
|
||||
"\n",
|
||||
"1. You can either set this in the server environment:\n",
|
||||
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
|
||||
"\n",
|
||||
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "XK1sk-McuhpE"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
|
||||
"\n",
|
||||
"from nemoguardrails import LLMRails, RailsConfig\n",
|
||||
"\n",
|
||||
"config = RailsConfig.from_path(\"./config.yml\")\n",
|
||||
"app = LLMRails(config, llm=llm)\n",
|
||||
"\n",
|
||||
"new_message = app.generate(messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Hello! What can you do for me?\"\n",
|
||||
"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "8A1KWKnzuxAS"
|
||||
},
|
||||
"source": [
|
||||
"### CONFIG.YML\n",
|
||||
"\n",
|
||||
"save this example `config.yml` in your current directory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NKN1GmSvu0Cx"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# instructions:\n",
|
||||
"# - type: general\n",
|
||||
"# content: |\n",
|
||||
"# Below is a conversation between a bot and a user about the recent job reports.\n",
|
||||
"# The bot is factual and concise. If the bot does not know the answer to a\n",
|
||||
"# question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"# sample_conversation: |\n",
|
||||
"# user \"Hello there!\"\n",
|
||||
"# express greeting\n",
|
||||
"# bot express greeting\n",
|
||||
"# \"Hello! How can I assist you today?\"\n",
|
||||
"# user \"What can you do for me?\"\n",
|
||||
"# ask about capabilities\n",
|
||||
"# bot respond about capabilities\n",
|
||||
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
|
||||
"# user \"What's 2+2?\"\n",
|
||||
"# ask math question\n",
|
||||
"# bot responds to math question\n",
|
||||
"# \"2+2 is equal to 4.\"\n",
|
||||
"\n",
|
||||
"# models:\n",
|
||||
"# - type: main\n",
|
||||
"# engine: openai\n",
|
||||
"# model: claude-instant-1"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -1,16 +1,12 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import litellm
|
||||
from litellm import embedding, completion, completion_cost
|
||||
|
||||
from autoevals.llm import *
|
||||
|
||||
###################
|
||||
import litellm
|
||||
|
||||
# litellm completion call
|
||||
question = "which country has the highest population"
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import traceback
|
||||
from flask import Flask, request, jsonify, abort, Response
|
||||
from flask import Flask, request, Response
|
||||
from flask_cors import CORS
|
||||
import traceback
|
||||
import litellm
|
||||
from util import handle_error
|
||||
from litellm import completion
|
||||
import os, dotenv, time
|
||||
import os
|
||||
import dotenv
|
||||
import time
|
||||
import json
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
@ -20,9 +21,9 @@ verbose = True
|
|||
|
||||
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
|
||||
######### PROMPT LOGGING ##########
|
||||
os.environ[
|
||||
"PROMPTLAYER_API_KEY"
|
||||
] = "" # set your promptlayer key here - https://promptlayer.com/
|
||||
os.environ["PROMPTLAYER_API_KEY"] = (
|
||||
"" # set your promptlayer key here - https://promptlayer.com/
|
||||
)
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["promptlayer"]
|
||||
|
@ -57,9 +58,9 @@ def api_completion():
|
|||
try:
|
||||
if "prompt" not in data:
|
||||
raise ValueError("data needs to have prompt")
|
||||
data[
|
||||
"model"
|
||||
] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
|
||||
data["model"] = (
|
||||
"togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
|
||||
)
|
||||
# COMPLETION CALL
|
||||
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
|
||||
messages = [
|
||||
|
@ -75,7 +76,7 @@ def api_completion():
|
|||
"stream" in data and data["stream"] == True
|
||||
): # use generate_responses to stream responses
|
||||
return Response(data_generator(response), mimetype="text/event-stream")
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# call handle_error function
|
||||
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
|
||||
## LOG FAILURE
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import requests
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
|
||||
def get_next_url(response):
|
||||
|
|
459
cookbook/liteLLM_Baseten.ipynb
vendored
459
cookbook/liteLLM_Baseten.ipynb
vendored
|
@ -1,238 +1,237 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "gZx-wHJapG5w"
|
||||
},
|
||||
"source": [
|
||||
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
|
||||
"\n",
|
||||
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
|
||||
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
|
||||
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
|
||||
"Example call\n",
|
||||
"```python\n",
|
||||
"model = \"q841o8w\" # baseten model version ID\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "4JSRa0QVogPo"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm==0.1.399\n",
|
||||
"!pip install baseten urllib3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "VEukLhDzo4vw"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4STYM2OHFNlc"
|
||||
},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {
|
||||
"id": "DorpLxw1FHbC"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
|
||||
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "syF3dTdKFSQQ"
|
||||
},
|
||||
"source": [
|
||||
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "rPgSoMlsojz0",
|
||||
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
|
||||
"\n",
|
||||
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
|
||||
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
|
||||
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
|
||||
"Example call\n",
|
||||
"```python\n",
|
||||
"model = \"q841o8w\" # baseten model version ID\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"```"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "gZx-wHJapG5w"
|
||||
}
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "4JSRa0QVogPo"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm==0.1.399\n",
|
||||
"!pip install baseten urllib3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import litellm\n",
|
||||
"from litellm import completion"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "VEukLhDzo4vw"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Setup"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "4STYM2OHFNlc"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
|
||||
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "DorpLxw1FHbC"
|
||||
},
|
||||
"execution_count": 21,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "syF3dTdKFSQQ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"model = \"qvv0xeq\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "rPgSoMlsojz0",
|
||||
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
|
||||
},
|
||||
"execution_count": 18,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
|
||||
" 'created': 1692135883.699066,\n",
|
||||
" 'model': 'qvv0xeq'}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 18
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "7n21UroEGCGa"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"model = \"q841o8w\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "uLVWFH899lAF",
|
||||
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
|
||||
},
|
||||
"execution_count": 19,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
|
||||
" 'created': 1692135900.2806294,\n",
|
||||
" 'model': 'q841o8w'}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 19
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "6-TFwmPAGPXq"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"model = \"31dxrj3\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "gbeYZOrUE_Bp",
|
||||
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
|
||||
},
|
||||
"execution_count": 20,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
|
||||
" 'created': 1692135914.7472186,\n",
|
||||
" 'model': '31dxrj3'}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 20
|
||||
}
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
|
||||
" 'created': 1692135883.699066,\n",
|
||||
" 'model': 'qvv0xeq'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
]
|
||||
],
|
||||
"source": [
|
||||
"model = \"qvv0xeq\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "7n21UroEGCGa"
|
||||
},
|
||||
"source": [
|
||||
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "uLVWFH899lAF",
|
||||
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
|
||||
" 'created': 1692135900.2806294,\n",
|
||||
" 'model': 'q841o8w'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = \"q841o8w\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "6-TFwmPAGPXq"
|
||||
},
|
||||
"source": [
|
||||
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "gbeYZOrUE_Bp",
|
||||
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
|
||||
" 'created': 1692135914.7472186,\n",
|
||||
" 'model': '31dxrj3'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = \"31dxrj3\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
382
cookbook/liteLLM_Langchain_Demo.ipynb
vendored
382
cookbook/liteLLM_Langchain_Demo.ipynb
vendored
|
@ -1,201 +1,195 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5hwntUxTMxEk"
|
||||
},
|
||||
"source": [
|
||||
"# Langchain liteLLM Demo Notebook\n",
|
||||
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
|
||||
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
|
||||
"\n",
|
||||
"Call all LLM models using the same I/O interface\n",
|
||||
"\n",
|
||||
"Example usage\n",
|
||||
"```python\n",
|
||||
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
|
||||
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
|
||||
"ChatLiteLLM(model=\"command-nightly\")\n",
|
||||
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Langchain liteLLM Demo Notebook\n",
|
||||
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
|
||||
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
|
||||
"\n",
|
||||
"Call all LLM models using the same I/O interface\n",
|
||||
"\n",
|
||||
"Example usage\n",
|
||||
"```python\n",
|
||||
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
|
||||
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
|
||||
"ChatLiteLLM(model=\"command-nightly\")\n",
|
||||
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
|
||||
"```"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "5hwntUxTMxEk"
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "aPNAUsCvB6Sv"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "MOhRaVnhB-0J"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.chat_models import ChatLiteLLM\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "TahkCtlmCD65",
|
||||
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "aPNAUsCvB6Sv"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.chat_models import ChatLiteLLM\n",
|
||||
"from langchain.prompts.chat import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
" AIMessagePromptTemplate,\n",
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "MOhRaVnhB-0J"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "TahkCtlmCD65",
|
||||
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
|
||||
},
|
||||
"execution_count": 17,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 17
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "uXNDyU4jChcs",
|
||||
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
|
||||
},
|
||||
"execution_count": 23,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 23
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you?\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "czbDJRKcC7BV",
|
||||
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
|
||||
},
|
||||
"execution_count": 27,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 27
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ['COHERE_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you?\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tZxpq5PDDY9Y",
|
||||
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
|
||||
},
|
||||
"execution_count": 30,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 30
|
||||
}
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
]
|
||||
],
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "uXNDyU4jChcs",
|
||||
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "czbDJRKcC7BV",
|
||||
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you?\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tZxpq5PDDY9Y",
|
||||
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['COHERE_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you?\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
2
cookbook/liteLLM_VertextAI_Example.ipynb
vendored
2
cookbook/liteLLM_VertextAI_Example.ipynb
vendored
|
@ -43,7 +43,7 @@
|
|||
"source": [
|
||||
"# set you Vertex AI configs\n",
|
||||
"import litellm\n",
|
||||
"from litellm import embedding, completion\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"litellm.vertex_project = \"hardy-device-386718\"\n",
|
||||
"litellm.vertex_location = \"us-central1\""
|
||||
|
|
650
cookbook/liteLLM_function_calling.ipynb
vendored
650
cookbook/liteLLM_function_calling.ipynb
vendored
|
@ -1,331 +1,331 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "vnvlwUDZK7VA"
|
||||
},
|
||||
"source": [
|
||||
"## Demo Notebook of Function Calling with liteLLM\n",
|
||||
"- Supported Providers for Function Calling\n",
|
||||
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
|
||||
"- In this notebook we use function calling with `litellm.completion()`"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Demo Notebook of Function Calling with liteLLM\n",
|
||||
"- Supported Providers for Function Calling\n",
|
||||
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
|
||||
"- In this notebook we use function calling with `litellm.completion()`"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "vnvlwUDZK7VA"
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "KrINCwRfLgZV"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Install liteLLM\n",
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "nK7zR5OgLlh2"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"id": "dCQlyBxKLqbA"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "gfdGv-FMRCdX"
|
||||
},
|
||||
"source": [
|
||||
"## Define Messages, Functions\n",
|
||||
"We create a get_current_weather() function and pass that to GPT 3.5\n",
|
||||
"\n",
|
||||
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"id": "ERzsP1sfM19C"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"def get_current_weather(location):\n",
|
||||
" if location == \"Boston, MA\":\n",
|
||||
" return \"The weather is 12F\"\n",
|
||||
"\n",
|
||||
"functions = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
|
||||
" },\n",
|
||||
" \"unit\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "NX6by2VuRPnp"
|
||||
},
|
||||
"source": [
|
||||
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "QVoJ5PtxMlVx",
|
||||
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"## Install liteLLM\n",
|
||||
"!pip install litellm"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "KrINCwRfLgZV"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os, litellm\n",
|
||||
"from litellm import completion"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "nK7zR5OgLlh2"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "dCQlyBxKLqbA"
|
||||
},
|
||||
"execution_count": 27,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Define Messages, Functions\n",
|
||||
"We create a get_current_weather() function and pass that to GPT 3.5\n",
|
||||
"\n",
|
||||
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "gfdGv-FMRCdX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"def get_current_weather(location):\n",
|
||||
" if location == \"Boston, MA\":\n",
|
||||
" return \"The weather is 12F\"\n",
|
||||
"\n",
|
||||
"functions = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
|
||||
" },\n",
|
||||
" \"unit\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "ERzsP1sfM19C"
|
||||
},
|
||||
"execution_count": 25,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "NX6by2VuRPnp"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "QVoJ5PtxMlVx",
|
||||
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
|
||||
},
|
||||
"execution_count": 9,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1691801223,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": null,\n",
|
||||
" \"function_call\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"function_call\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 82,\n",
|
||||
" \"completion_tokens\": 18,\n",
|
||||
" \"total_tokens\": 100\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Parse GPT 3.5 Response\n",
|
||||
"Read Information about what Function to Call"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Yu0o2saDNLx8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
|
||||
"function_call_data"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "u1DzXLJsNOR5",
|
||||
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
|
||||
},
|
||||
"execution_count": 11,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 11
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import json\n",
|
||||
"function_name = function_call_data['name']\n",
|
||||
"function_args = function_call_data['arguments']\n",
|
||||
"function_args = json.loads(function_args)\n",
|
||||
"print(function_name, function_args)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tYb96Mh0NhH9",
|
||||
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
|
||||
},
|
||||
"execution_count": 20,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"get_current_weather {'location': 'Boston, MA'}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Call the get_current_weather() function"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "z3tstH_yN3fX"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"if function_name == \"get_current_weather\":\n",
|
||||
" result = get_current_weather(**function_args)\n",
|
||||
" print(result)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "TSb8JHhgN5Zc",
|
||||
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
|
||||
},
|
||||
"execution_count": 24,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"12F\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Send the response from get_current_weather back to the model to summarize"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "k4HGJE3NRmMI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
|
||||
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
|
||||
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
|
||||
"]\n",
|
||||
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "a23cmEwiPaw7",
|
||||
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
|
||||
},
|
||||
"execution_count": 26,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1691801963,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 109,\n",
|
||||
" \"completion_tokens\": 12,\n",
|
||||
" \"total_tokens\": 121\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1691801223,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": null,\n",
|
||||
" \"function_call\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"function_call\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 82,\n",
|
||||
" \"completion_tokens\": 18,\n",
|
||||
" \"total_tokens\": 100\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
],
|
||||
"source": [
|
||||
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Yu0o2saDNLx8"
|
||||
},
|
||||
"source": [
|
||||
"## Parse GPT 3.5 Response\n",
|
||||
"Read Information about what Function to Call"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "u1DzXLJsNOR5",
|
||||
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
|
||||
"function_call_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tYb96Mh0NhH9",
|
||||
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"get_current_weather {'location': 'Boston, MA'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"function_name = function_call_data['name']\n",
|
||||
"function_args = function_call_data['arguments']\n",
|
||||
"function_args = json.loads(function_args)\n",
|
||||
"print(function_name, function_args)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "z3tstH_yN3fX"
|
||||
},
|
||||
"source": [
|
||||
"## Call the get_current_weather() function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "TSb8JHhgN5Zc",
|
||||
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"12F\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"if function_name == \"get_current_weather\":\n",
|
||||
" result = get_current_weather(**function_args)\n",
|
||||
" print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "k4HGJE3NRmMI"
|
||||
},
|
||||
"source": [
|
||||
"## Send the response from get_current_weather back to the model to summarize"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "a23cmEwiPaw7",
|
||||
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1691801963,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 109,\n",
|
||||
" \"completion_tokens\": 12,\n",
|
||||
" \"total_tokens\": 121\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
|
||||
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
|
||||
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
|
||||
"]\n",
|
||||
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
|
||||
"print(response)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -1,13 +1,13 @@
|
|||
import openai
|
||||
|
||||
api_base = f"http://0.0.0.0:8000"
|
||||
api_base = "http://0.0.0.0:8000"
|
||||
|
||||
openai.api_base = api_base
|
||||
openai.api_key = "temp-key"
|
||||
print(openai.api_base)
|
||||
|
||||
|
||||
print(f"LiteLLM: response from proxy with streaming")
|
||||
print("LiteLLM: response from proxy with streaming")
|
||||
response = openai.ChatCompletion.create(
|
||||
model="ollama/llama2",
|
||||
messages=[
|
||||
|
|
1130
cookbook/litellm_Test_Multiple_Providers.ipynb
vendored
1130
cookbook/litellm_Test_Multiple_Providers.ipynb
vendored
File diff suppressed because one or more lines are too long
97
cookbook/litellm_model_fallback.ipynb
vendored
97
cookbook/litellm_model_fallback.ipynb
vendored
|
@ -1,52 +1,51 @@
|
|||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "j6yJsCGeaq8G"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "j6yJsCGeaq8G"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "u129iWNPaf72"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"from litellm import embedding, completion\n",
|
||||
"\n",
|
||||
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
|
||||
"\n",
|
||||
"user_message = \"Hello, how are you?\"\n",
|
||||
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"for model in model_fallback_list:\n",
|
||||
" try:\n",
|
||||
" response = completion(model=model, messages=messages)\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"error occurred: {traceback.format_exc()}\")"
|
||||
]
|
||||
}
|
||||
]
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "u129iWNPaf72"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
|
||||
"\n",
|
||||
"user_message = \"Hello, how are you?\"\n",
|
||||
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"for model in model_fallback_list:\n",
|
||||
" try:\n",
|
||||
" response = completion(model=model, messages=messages)\n",
|
||||
" except Exception:\n",
|
||||
" print(f\"error occurred: {traceback.format_exc()}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
|
||||
from litellm import Router
|
||||
import litellm
|
||||
|
@ -137,7 +135,7 @@ for future in futures:
|
|||
else:
|
||||
failed_calls += 1
|
||||
|
||||
print(f"Load test Summary:")
|
||||
print("Load test Summary:")
|
||||
print(f"Total Requests: {concurrent_calls}")
|
||||
print(f"Successful Calls: {successful_calls}")
|
||||
print(f"Failed Calls: {failed_calls}")
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
|
||||
from litellm import Router
|
||||
import litellm
|
||||
|
@ -160,7 +158,7 @@ for future in futures:
|
|||
else:
|
||||
failed_calls += 1
|
||||
|
||||
print(f"Load test Summary:")
|
||||
print("Load test Summary:")
|
||||
print(f"Total Requests: {concurrent_calls}")
|
||||
print(f"Successful Calls: {successful_calls}")
|
||||
print(f"Failed Calls: {failed_calls}")
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
|
||||
from litellm import Router
|
||||
import litellm
|
||||
|
@ -132,7 +130,7 @@ for future in futures:
|
|||
else:
|
||||
failed_calls += 1
|
||||
|
||||
print(f"Load test Summary:")
|
||||
print("Load test Summary:")
|
||||
print(f"Total Requests: {concurrent_calls}")
|
||||
print(f"Successful Calls: {successful_calls}")
|
||||
print(f"Failed Calls: {failed_calls}")
|
||||
|
|
|
@ -1,14 +1,9 @@
|
|||
from fastapi import FastAPI
|
||||
import uvicorn
|
||||
from memory_profiler import profile, memory_usage
|
||||
from memory_profiler import profile
|
||||
import os
|
||||
import traceback
|
||||
import asyncio
|
||||
import pytest
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
import uuid
|
||||
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
#### What this tests ####
|
||||
|
||||
from memory_profiler import profile, memory_usage
|
||||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
from memory_profiler import profile
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
import uuid
|
||||
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
#### What this tests ####
|
||||
|
||||
from memory_profiler import profile, memory_usage
|
||||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
from memory_profiler import profile
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
import uuid
|
||||
|
||||
|
|
|
@ -1,17 +1,14 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import copy
|
||||
|
||||
load_dotenv()
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import asyncio
|
||||
from litellm import Router, Timeout
|
||||
from litellm import Timeout
|
||||
import time
|
||||
from litellm.caching.caching import Cache
|
||||
import litellm
|
||||
import openai
|
||||
|
||||
### Test just calling AsyncAzureOpenAI
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import copy
|
||||
|
||||
load_dotenv()
|
||||
sys.path.insert(
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import copy
|
||||
|
||||
load_dotenv()
|
||||
sys.path.insert(
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import requests
|
||||
import json
|
||||
|
||||
|
||||
def get_initial_config():
|
||||
|
|
|
@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):
|
|||
|
||||
litellm_model_name = litellm_params.get("model", "") or ""
|
||||
if "vertex_ai/" in litellm_model_name:
|
||||
print(f"\033[91m\nSkipping Vertex AI model\033[0m", model)
|
||||
print("\033[91m\nSkipping Vertex AI model\033[0m", model)
|
||||
continue
|
||||
|
||||
for param, value in litellm_params.items():
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import os
|
||||
from openai import OpenAI
|
||||
from dotenv import load_dotenv
|
||||
import httpx
|
||||
import concurrent.futures
|
||||
|
||||
load_dotenv()
|
||||
|
|
|
@ -2,21 +2,16 @@
|
|||
import json
|
||||
import boto3
|
||||
|
||||
import sys, os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
import io
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
import litellm
|
||||
|
||||
import io
|
||||
import json
|
||||
|
||||
|
||||
class TokenIterator:
|
||||
|
@ -48,7 +43,6 @@ payload = {
|
|||
"stream": True,
|
||||
}
|
||||
|
||||
import boto3
|
||||
|
||||
client = boto3.client("sagemaker-runtime", region_name="us-west-2")
|
||||
response = client.invoke_endpoint_with_response_stream(
|
||||
|
|
|
@ -111,7 +111,6 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import mlflow\n",
|
||||
"mlflow.langchain.autolog()"
|
||||
]
|
||||
},
|
||||
|
|
|
@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
# Enter your DATABASE_URL here
|
||||
|
||||
|
@ -33,7 +32,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
# Try to select one row from the view
|
||||
await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
|
||||
print("LiteLLM_VerificationTokenView Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# If an error occurs, the view does not exist, so create it
|
||||
await db.execute_raw(
|
||||
"""
|
||||
|
@ -54,7 +53,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
try:
|
||||
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
|
||||
print("MonthlyGlobalSpend Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
sql_query = """
|
||||
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
|
||||
SELECT
|
||||
|
@ -74,7 +73,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
try:
|
||||
await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
|
||||
print("Last30dKeysBySpend Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
sql_query = """
|
||||
CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
|
||||
SELECT
|
||||
|
@ -102,7 +101,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
try:
|
||||
await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
|
||||
print("Last30dModelsBySpend Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
sql_query = """
|
||||
CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
|
||||
SELECT
|
||||
|
@ -124,7 +123,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
try:
|
||||
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
|
||||
print("MonthlyGlobalSpendPerKey Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
sql_query = """
|
||||
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS
|
||||
SELECT
|
||||
|
@ -147,7 +146,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
"""SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
|
||||
)
|
||||
print("MonthlyGlobalSpendPerUserPerKey Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
sql_query = """
|
||||
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS
|
||||
SELECT
|
||||
|
@ -171,7 +170,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
try:
|
||||
await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
|
||||
print("DailyTagSpend Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
sql_query = """
|
||||
CREATE OR REPLACE VIEW DailyTagSpend AS
|
||||
SELECT
|
||||
|
@ -189,7 +188,7 @@ async def check_view_exists(): # noqa: PLR0915
|
|||
try:
|
||||
await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
|
||||
print("Last30dTopEndUsersSpend Exists!") # noqa
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
sql_query = """
|
||||
CREATE VIEW "Last30dTopEndUsersSpend" AS
|
||||
SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend
|
||||
|
|
|
@ -17,7 +17,7 @@ async def log_event(request: Request):
|
|||
# For now, just printing the received data
|
||||
|
||||
return {"message": "Request received successfully"}
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail="Internal Server Error")
|
||||
|
||||
|
||||
|
|
|
@ -2,12 +2,10 @@
|
|||
|
||||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import os
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
from typing import Literal, Union, Optional
|
||||
from typing import Optional
|
||||
|
||||
import traceback
|
||||
|
||||
|
@ -15,10 +13,8 @@ import traceback
|
|||
#### What this does ####
|
||||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import traceback
|
||||
import datetime, subprocess, sys
|
||||
import litellm, uuid
|
||||
import litellm
|
||||
import uuid
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
|
||||
|
|
|
@ -11,9 +11,9 @@ import os
|
|||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from typing import Optional, Literal, Union, Any
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching.caching import DualCache
|
||||
from typing import Optional, Literal, Any
|
||||
import litellm
|
||||
import sys
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from fastapi import HTTPException
|
||||
|
@ -23,14 +23,10 @@ from litellm.litellm_core_utils.logging_utils import (
|
|||
convert_litellm_response_object_to_str,
|
||||
)
|
||||
from typing import List
|
||||
from datetime import datetime
|
||||
import aiohttp, asyncio
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
import httpx
|
||||
import json
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
|
||||
|
@ -147,7 +143,6 @@ class AporiaGuardrail(CustomGuardrail):
|
|||
from litellm.proxy.common_utils.callback_utils import (
|
||||
add_guardrail_to_applied_guardrails_header,
|
||||
)
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
|
||||
"""
|
||||
Use this for the post call moderation with Guardrails
|
||||
|
@ -183,7 +178,6 @@ class AporiaGuardrail(CustomGuardrail):
|
|||
from litellm.proxy.common_utils.callback_utils import (
|
||||
add_guardrail_to_applied_guardrails_header,
|
||||
)
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
|
||||
event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
|
||||
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
|
||||
|
|
|
@ -7,14 +7,13 @@
|
|||
## Reject a call / response if it contains certain keywords
|
||||
|
||||
|
||||
from typing import Optional, Literal
|
||||
from typing import Literal
|
||||
import litellm
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from fastapi import HTTPException
|
||||
import json, traceback
|
||||
|
||||
|
||||
class _ENTERPRISE_BannedKeywords(CustomLogger):
|
||||
|
@ -73,7 +72,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
|
|||
- check if user id part of call
|
||||
- check if user id part of blocked list
|
||||
"""
|
||||
self.print_verbose(f"Inside Banned Keyword List Pre-Call Hook")
|
||||
self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
|
||||
if call_type == "completion" and "messages" in data:
|
||||
for m in data["messages"]:
|
||||
if "content" in m and isinstance(m["content"], str):
|
||||
|
|
|
@ -15,7 +15,6 @@ from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
|
|||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from fastapi import HTTPException
|
||||
import json, traceback
|
||||
|
||||
|
||||
class _ENTERPRISE_BlockedUserList(CustomLogger):
|
||||
|
@ -69,7 +68,7 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
|
|||
- check if end-user in cache
|
||||
- check if end-user in db
|
||||
"""
|
||||
self.print_verbose(f"Inside Blocked User List Pre-Call Hook")
|
||||
self.print_verbose("Inside Blocked User List Pre-Call Hook")
|
||||
if "user_id" in data or "user" in data:
|
||||
user = data.get("user_id", data.get("user", ""))
|
||||
if (
|
||||
|
|
|
@ -7,21 +7,12 @@
|
|||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||
|
||||
|
||||
from typing import Optional, Literal, Union
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching.caching import DualCache
|
||||
from typing import Literal
|
||||
import litellm
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
StreamingChoices,
|
||||
)
|
||||
from datetime import datetime
|
||||
import aiohttp, asyncio
|
||||
|
||||
|
||||
class _ENTERPRISE_GoogleTextModeration(CustomLogger):
|
||||
|
|
|
@ -7,28 +7,24 @@
|
|||
# +-------------------------------------------------------------+
|
||||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||
|
||||
import sys, os
|
||||
import sys
|
||||
import os
|
||||
from collections.abc import Iterable
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from typing import Optional, Literal, Union
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching.caching import DualCache
|
||||
from typing import Optional, Literal
|
||||
import litellm
|
||||
import sys
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.types.utils import (
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
StreamingChoices,
|
||||
Choices,
|
||||
)
|
||||
from datetime import datetime
|
||||
import aiohttp, asyncio
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
|
|
@ -7,26 +7,13 @@
|
|||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||
## This provides an LLM Guard Integration for content moderation on the proxy
|
||||
|
||||
from typing import Optional, Literal, Union
|
||||
from typing import Optional, Literal
|
||||
import litellm
|
||||
import traceback
|
||||
import sys
|
||||
import uuid
|
||||
import os
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
StreamingChoices,
|
||||
)
|
||||
from datetime import datetime
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from litellm.utils import get_formatted_prompt
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
|
||||
|
@ -164,7 +151,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
|||
"moderation",
|
||||
"audio_transcription",
|
||||
]
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
self.print_verbose(
|
||||
f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
|
||||
)
|
||||
|
|
|
@ -5,27 +5,19 @@
|
|||
# +-------------------------------------------------------------+
|
||||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||
|
||||
import sys, os
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from typing import Optional, Literal, Union
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching.caching import DualCache
|
||||
from typing import Literal
|
||||
import litellm
|
||||
import sys
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
StreamingChoices,
|
||||
)
|
||||
from datetime import datetime
|
||||
import aiohttp, asyncio
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
|
|
@ -471,8 +471,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
|||
data: dict,
|
||||
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
|
||||
):
|
||||
from detect_secrets import SecretsCollection
|
||||
from detect_secrets.settings import default_settings
|
||||
|
||||
if await self.should_run_check(user_api_key_dict) is False:
|
||||
return
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
# Enterprise Proxy Util Endpoints
|
||||
from typing import Optional, List
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.proxy.proxy_server import PrismaClient, HTTPException
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
import collections
|
||||
|
@ -116,7 +115,7 @@ async def ui_get_spend_by_tags(
|
|||
|
||||
|
||||
def _forecast_daily_cost(data: list):
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import timedelta
|
||||
|
||||
if len(data) == 0:
|
||||
return {
|
||||
|
|
|
@ -1063,9 +1063,9 @@ from .llms.sagemaker.chat.transformation import SagemakerChatConfig
|
|||
from .llms.ollama_chat import OllamaChatConfig
|
||||
from .llms.bedrock.chat.invoke_handler import (
|
||||
AmazonCohereChatConfig,
|
||||
AmazonConverseConfig,
|
||||
bedrock_tool_name_mappings,
|
||||
)
|
||||
from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
|
||||
from .llms.bedrock.common_utils import (
|
||||
AmazonTitanConfig,
|
||||
AmazonAI21Config,
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from logging import Formatter
|
||||
|
||||
|
|
|
@ -12,12 +12,11 @@ import json
|
|||
|
||||
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
|
||||
import os
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import redis # type: ignore
|
||||
import redis.asyncio as async_redis # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm import get_secret, get_secret_str
|
||||
|
||||
from ._logging import verbose_logger
|
||||
|
|
|
@ -1,23 +1,12 @@
|
|||
# What is this?
|
||||
## Translates OpenAI call to Anthropic `/v1/messages` format
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Any, Literal, Optional
|
||||
|
||||
import dotenv
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from typing import Any, Optional
|
||||
|
||||
import litellm
|
||||
from litellm import ChatCompletionRequest, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.types.llms.anthropic import (
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicResponse,
|
||||
ContentBlockDelta,
|
||||
)
|
||||
from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
|
||||
from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse
|
||||
|
||||
|
||||
|
|
|
@ -7,12 +7,11 @@ from functools import partial
|
|||
from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
from openai.types.beta.assistant import Assistant
|
||||
from openai.types.beta.assistant_deleted import AssistantDeleted
|
||||
|
||||
import litellm
|
||||
from litellm.llms.azure import assistants
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.utils import (
|
||||
exception_type,
|
||||
|
|
|
@ -144,7 +144,6 @@ def batch_completion_models(*args, **kwargs):
|
|||
This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
|
||||
It sends requests concurrently and returns the response from the first model that responds.
|
||||
"""
|
||||
import concurrent
|
||||
|
||||
if "model" in kwargs:
|
||||
kwargs.pop("model")
|
||||
|
|
|
@ -19,24 +19,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union
|
|||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm import client
|
||||
from litellm.llms.azure.azure import AzureBatchesAPI
|
||||
from litellm.llms.openai.openai import OpenAIBatchesAPI
|
||||
from litellm.llms.vertex_ai.batches.handler import (
|
||||
VertexAIBatchPrediction,
|
||||
)
|
||||
from litellm.secret_managers.main import get_secret, get_secret_str
|
||||
from litellm.types.llms.openai import (
|
||||
Batch,
|
||||
CancelBatchRequest,
|
||||
CreateBatchRequest,
|
||||
CreateFileRequest,
|
||||
FileContentRequest,
|
||||
FileObject,
|
||||
FileTypes,
|
||||
HttpxBinaryResponseContent,
|
||||
RetrieveBatchRequest,
|
||||
)
|
||||
from litellm.llms.vertex_ai.batches.handler import VertexAIBatchPrediction
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import Batch, CreateBatchRequest, RetrieveBatchRequest
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.utils import supports_httpx_timeout
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ import json
|
|||
import os
|
||||
import threading
|
||||
import time
|
||||
from typing import Literal, Optional, Union
|
||||
from typing import Literal, Optional
|
||||
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse
|
||||
|
|
|
@ -8,16 +8,12 @@
|
|||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||
|
||||
import ast
|
||||
import asyncio
|
||||
import hashlib
|
||||
import inspect
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import traceback
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Union
|
||||
|
||||
from openai.types.audio.transcription_create_params import TranscriptionCreateParams
|
||||
from openai.types.chat.completion_create_params import (
|
||||
|
@ -41,7 +37,7 @@ from litellm.types.utils import all_litellm_params
|
|||
|
||||
from .base_cache import BaseCache
|
||||
from .disk_cache import DiskCache
|
||||
from .dual_cache import DualCache
|
||||
from .dual_cache import DualCache # noqa
|
||||
from .in_memory_cache import InMemoryCache
|
||||
from .qdrant_semantic_cache import QdrantSemanticCache
|
||||
from .redis_cache import RedisCache
|
||||
|
|
|
@ -35,13 +35,7 @@ from pydantic import BaseModel
|
|||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.caching.caching import (
|
||||
Cache,
|
||||
QdrantSemanticCache,
|
||||
RedisCache,
|
||||
RedisSemanticCache,
|
||||
S3Cache,
|
||||
)
|
||||
from litellm.caching.caching import S3Cache
|
||||
from litellm.litellm_core_utils.logging_utils import (
|
||||
_assemble_complete_response_from_streaming_chunks,
|
||||
)
|
||||
|
@ -550,12 +544,7 @@ class LLMCachingHandler:
|
|||
Returns:
|
||||
Optional[Any]:
|
||||
"""
|
||||
from litellm.utils import (
|
||||
CustomStreamWrapper,
|
||||
convert_to_model_response_object,
|
||||
convert_to_streaming_response,
|
||||
convert_to_streaming_response_async,
|
||||
)
|
||||
from litellm.utils import convert_to_model_response_object
|
||||
|
||||
if (
|
||||
call_type == CallTypes.acompletion.value
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
import json
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from litellm._logging import print_verbose
|
||||
|
||||
from .base_cache import BaseCache
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
|
@ -12,7 +12,7 @@ import asyncio
|
|||
import time
|
||||
import traceback
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
|
||||
from typing import TYPE_CHECKING, Any, List, Optional
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
|
|
@ -15,7 +15,6 @@ from typing import Any
|
|||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose
|
||||
from litellm.types.caching import LiteLLMCacheType
|
||||
|
||||
from .base_cache import BaseCache
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@ import asyncio
|
|||
import inspect
|
||||
import json
|
||||
import time
|
||||
import traceback
|
||||
from datetime import timedelta
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
|
||||
|
||||
|
@ -21,8 +20,7 @@ import litellm
|
|||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||
from litellm.types.caching import RedisPipelineIncrementOperation
|
||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||
from litellm.types.utils import all_litellm_params
|
||||
from litellm.types.services import ServiceTypes
|
||||
|
||||
from .base_cache import BaseCache
|
||||
|
||||
|
@ -53,7 +51,6 @@ class RedisCache(BaseCache):
|
|||
startup_nodes: Optional[List] = None, # for redis-cluster
|
||||
**kwargs,
|
||||
):
|
||||
import redis
|
||||
|
||||
from litellm._service_logger import ServiceLogging
|
||||
|
||||
|
|
|
@ -32,7 +32,6 @@ class RedisSemanticCache(BaseCache):
|
|||
**kwargs,
|
||||
):
|
||||
from redisvl.index import SearchIndex
|
||||
from redisvl.query import VectorQuery
|
||||
|
||||
print_verbose(
|
||||
"redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
|
||||
|
@ -141,7 +140,6 @@ class RedisSemanticCache(BaseCache):
|
|||
|
||||
def get_cache(self, key, **kwargs):
|
||||
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
|
||||
import numpy as np
|
||||
from redisvl.query import VectorQuery
|
||||
|
||||
# query
|
||||
|
@ -253,7 +251,6 @@ class RedisSemanticCache(BaseCache):
|
|||
|
||||
async def async_get_cache(self, key, **kwargs):
|
||||
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
|
||||
import numpy as np
|
||||
from redisvl.query import VectorQuery
|
||||
|
||||
from litellm.proxy.proxy_server import llm_model_list, llm_router
|
||||
|
|
|
@ -12,11 +12,9 @@ Has 4 methods:
|
|||
import ast
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Any, Optional
|
||||
from typing import Optional
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.types.caching import LiteLLMCacheType
|
||||
|
||||
from .base_cache import BaseCache
|
||||
|
||||
|
@ -103,7 +101,6 @@ class S3Cache(BaseCache):
|
|||
self.set_cache(key=key, value=value, **kwargs)
|
||||
|
||||
def get_cache(self, key, **kwargs):
|
||||
import boto3
|
||||
import botocore
|
||||
|
||||
try:
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# What is this?
|
||||
## File for 'response_cost' calculation in Logging
|
||||
import time
|
||||
import traceback
|
||||
from typing import Any, List, Literal, Optional, Tuple, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
@ -44,14 +43,12 @@ from litellm.llms.openai.cost_calculation import (
|
|||
cost_per_second as openai_cost_per_second,
|
||||
)
|
||||
from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
|
||||
from litellm.llms.openai.cost_calculation import cost_router as openai_cost_router
|
||||
from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
|
||||
from litellm.llms.vertex_ai.image_generation.cost_calculator import (
|
||||
cost_calculator as vertex_ai_image_cost_calculator,
|
||||
)
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.rerank import RerankResponse
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage
|
||||
from litellm.utils import (
|
||||
CallTypes,
|
||||
|
|
|
@ -14,14 +14,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
|
|||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm import client, get_secret_str
|
||||
from litellm import get_secret_str
|
||||
from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI
|
||||
from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI
|
||||
from litellm.llms.vertex_ai.files.handler import (
|
||||
VertexAIFilesHandler,
|
||||
)
|
||||
from litellm.llms.vertex_ai.files.handler import VertexAIFilesHandler
|
||||
from litellm.types.llms.openai import (
|
||||
Batch,
|
||||
CreateFileRequest,
|
||||
FileContentRequest,
|
||||
FileTypes,
|
||||
|
|
|
@ -19,10 +19,10 @@ import httpx
|
|||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
|
||||
from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI, FineTuningJob, FineTuningJobCreate
|
||||
from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
|
||||
from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import Hyperparameters
|
||||
from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
|
||||
from litellm.types.router import *
|
||||
from litellm.utils import supports_httpx_timeout
|
||||
|
||||
|
|
|
@ -6,11 +6,9 @@ Slack alerts are sent every 10s or when events are greater than X events
|
|||
see custom_batch_logger.py for more details / defaults
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||
from litellm.proxy._types import AlertType, WebhookEvent
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .slack_alerting import SlackAlerting as _SlackAlerting
|
||||
|
@ -21,7 +19,6 @@ else:
|
|||
|
||||
|
||||
def squash_payloads(queue):
|
||||
import json
|
||||
|
||||
squashed = {}
|
||||
if len(queue) == 0:
|
||||
|
|
|
@ -4,16 +4,10 @@ import asyncio
|
|||
import datetime
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
from datetime import datetime as dt
|
||||
from datetime import timedelta, timezone
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, Union, get_args
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, List, Literal, Optional, Union
|
||||
|
||||
import aiohttp
|
||||
import dotenv
|
||||
from openai import APIError
|
||||
|
||||
import litellm
|
||||
|
@ -26,22 +20,13 @@ from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
|||
from litellm.litellm_core_utils.exception_mapping_utils import (
|
||||
_add_key_name_and_team_to_alert,
|
||||
)
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.proxy._types import (
|
||||
AlertType,
|
||||
CallInfo,
|
||||
UserAPIKeyAuth,
|
||||
VirtualKeyEvent,
|
||||
WebhookEvent,
|
||||
)
|
||||
from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
|
||||
from litellm.router import Router
|
||||
from litellm.types.integrations.slack_alerting import *
|
||||
from litellm.types.router import LiteLLM_Params
|
||||
|
||||
from ..email_templates.templates import *
|
||||
from .batching_handler import send_to_webhook, squash_payloads
|
||||
|
@ -1261,7 +1246,7 @@ Model Info:
|
|||
|
||||
Returns -> True if sent, False if not.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import premium_user, prisma_client
|
||||
from litellm.proxy.proxy_server import premium_user
|
||||
from litellm.proxy.utils import send_email
|
||||
|
||||
email_logo_url = os.getenv(
|
||||
|
@ -1370,7 +1355,6 @@ Model Info:
|
|||
if alert_type not in self.alert_types:
|
||||
return
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
# Get the current timestamp
|
||||
|
|
|
@ -5,7 +5,6 @@ Utils used for slack alerting
|
|||
import asyncio
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.proxy._types import AlertType
|
||||
from litellm.secret_managers.main import get_secret
|
||||
|
|
|
@ -6,14 +6,9 @@ import asyncio
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, TypedDict, Union
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import dotenv # type: ignore
|
||||
import httpx
|
||||
from pydantic import BaseModel # type: ignore
|
||||
|
||||
|
@ -21,11 +16,7 @@ import litellm
|
|||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||
get_content_from_model_response,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
|
@ -33,7 +24,6 @@ from litellm.types.integrations.argilla import (
|
|||
SUPPORTED_PAYLOAD_FIELDS,
|
||||
ArgillaCredentialsObject,
|
||||
ArgillaItem,
|
||||
ArgillaPayload,
|
||||
)
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ this file has Arize ai specific helper functions
|
|||
"""
|
||||
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
|
@ -30,7 +30,6 @@ class ArizeLogger:
|
|||
def set_arize_ai_attributes(span: Span, kwargs, response_obj):
|
||||
from litellm.integrations._types.open_inference import (
|
||||
MessageAttributes,
|
||||
MessageContentAttributes,
|
||||
OpenInferenceSpanKindValues,
|
||||
SpanAttributes,
|
||||
)
|
||||
|
|
|
@ -3,23 +3,8 @@ import json
|
|||
import os
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from re import S, T
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypedDict,
|
||||
Union,
|
||||
)
|
||||
from typing import List, Optional
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.constants import AZURE_STORAGE_MSFT_VERSION
|
||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||
|
|
|
@ -2,15 +2,10 @@
|
|||
## Log success + failure events to Braintrust
|
||||
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Literal, Optional
|
||||
from typing import Optional
|
||||
|
||||
import dotenv
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -18,12 +13,11 @@ import litellm
|
|||
from litellm import verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.utils import get_formatted_prompt, print_verbose
|
||||
from litellm.utils import print_verbose
|
||||
|
||||
global_braintrust_http_handler = get_async_httpx_client(
|
||||
llm_provider=httpxSpecialProvider.LoggingCallback
|
||||
|
|
|
@ -6,7 +6,7 @@ Use this if you want your logs to be stored in memory and flushed periodically
|
|||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import List, Literal, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import List, Literal, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
|
|
@ -1,18 +1,14 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import os
|
||||
import traceback
|
||||
from datetime import datetime as datetimeObj
|
||||
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
|
||||
|
||||
import dotenv
|
||||
from pydantic import BaseModel
|
||||
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.integrations.argilla import ArgillaItem
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
|
||||
from litellm.types.services import ServiceLoggerPayload
|
||||
from litellm.types.utils import (
|
||||
AdapterCompletionStreamWrapper,
|
||||
EmbeddingResponse,
|
||||
|
|
|
@ -16,11 +16,10 @@ For batching specific details see CustomBatchLogger class
|
|||
import asyncio
|
||||
import datetime
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime as datetimeObj
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
from httpx import Response
|
||||
|
||||
|
@ -32,7 +31,6 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.integrations.datadog import *
|
||||
from litellm.types.services import ServiceLoggerPayload
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
|
|
@ -8,12 +8,9 @@ API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=examp
|
|||
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from httpx import Response
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to Supabase
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
import dotenv
|
||||
|
||||
import litellm
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
Functions for sending Email Alerts
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
|
@ -20,7 +19,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
|
|||
)
|
||||
if team_id is None:
|
||||
return []
|
||||
from litellm.proxy.proxy_server import premium_user, prisma_client
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise Exception("Not connected to DB!")
|
||||
|
@ -72,7 +71,6 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
|
|||
Send an Email Alert to All Team Members when the Team Budget is crossed
|
||||
Returns -> True if sent, False if not.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import premium_user, prisma_client
|
||||
from litellm.proxy.utils import send_email
|
||||
|
||||
_team_id = webhook_event.team_id
|
||||
|
|
|
@ -1,15 +1,12 @@
|
|||
import os
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
_get_httpx_client,
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
|
|
|
@ -1,27 +1,14 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from re import S
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
|
||||
from litellm.proxy._types import CommonProxyErrors
|
||||
from litellm.types.integrations.gcs_bucket import *
|
||||
from litellm.types.utils import (
|
||||
StandardCallbackDynamicParams,
|
||||
StandardLoggingMetadata,
|
||||
StandardLoggingPayload,
|
||||
)
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
|
||||
|
|
|
@ -1,13 +1,7 @@
|
|||
import json
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
|
@ -15,11 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.types.integrations.gcs_bucket import *
|
||||
from litellm.types.utils import (
|
||||
StandardCallbackDynamicParams,
|
||||
StandardLoggingMetadata,
|
||||
StandardLoggingPayload,
|
||||
)
|
||||
from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
|
||||
|
@ -190,9 +180,7 @@ class GCSBucketBase(CustomBatchLogger):
|
|||
This function is used to get the Vertex instance for the GCS Bucket Logger.
|
||||
It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it.
|
||||
"""
|
||||
from litellm.llms.vertex_ai.vertex_llm_base import (
|
||||
VertexBase,
|
||||
)
|
||||
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
|
||||
|
||||
_in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials)
|
||||
if _in_memory_key not in self.vertex_instances:
|
||||
|
|
|
@ -3,10 +3,7 @@
|
|||
import os
|
||||
import traceback
|
||||
|
||||
import dotenv
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
|
||||
class HeliconeLogger:
|
||||
|
|
|
@ -3,11 +3,9 @@
|
|||
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Literal, Optional
|
||||
|
||||
import dotenv
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
import copy
|
||||
import os
|
||||
import traceback
|
||||
import types
|
||||
from collections.abc import MutableMapping, MutableSequence, MutableSet
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, cast
|
||||
|
||||
|
|
|
@ -6,11 +6,8 @@ Used to get the LangFuseLogger for a given request
|
|||
Handles Key/Team Based Langfuse Logging
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
from packaging.version import Version
|
||||
|
||||
from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams
|
||||
|
||||
from .langfuse import LangFuseLogger, LangfuseLoggingConfig
|
||||
|
|
|
@ -3,14 +3,12 @@
|
|||
import asyncio
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, TypedDict, Union
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import dotenv # type: ignore
|
||||
import httpx
|
||||
from pydantic import BaseModel # type: ignore
|
||||
|
||||
|
@ -18,7 +16,6 @@ import litellm
|
|||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
import traceback
|
||||
import json
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import SpanAttributes
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from litellm.proxy._types import SpanAttributes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
|
|
|
@ -3,17 +3,12 @@
|
|||
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
|
||||
import dotenv
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||
|
||||
import litellm
|
||||
|
@ -10,10 +9,7 @@ from litellm.integrations.custom_logger import CustomLogger
|
|||
from litellm.types.services import ServiceLoggerPayload
|
||||
from litellm.types.utils import (
|
||||
ChatCompletionMessageToolCall,
|
||||
EmbeddingResponse,
|
||||
Function,
|
||||
ImageResponse,
|
||||
ModelResponse,
|
||||
StandardLoggingPayload,
|
||||
)
|
||||
|
||||
|
@ -139,7 +135,6 @@ class OpenTelemetry(CustomLogger):
|
|||
end_time: Optional[Union[datetime, float]] = None,
|
||||
event_metadata: Optional[dict] = None,
|
||||
):
|
||||
from datetime import datetime
|
||||
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
@ -201,7 +196,6 @@ class OpenTelemetry(CustomLogger):
|
|||
end_time: Optional[Union[float, datetime]] = None,
|
||||
event_metadata: Optional[dict] = None,
|
||||
):
|
||||
from datetime import datetime
|
||||
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
@ -666,7 +660,6 @@ class OpenTelemetry(CustomLogger):
|
|||
span.set_attribute(key, primitive_value)
|
||||
|
||||
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
|
||||
from litellm.proxy._types import SpanAttributes
|
||||
|
||||
kwargs.get("optional_params", {})
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
|
@ -834,7 +827,6 @@ class OpenTelemetry(CustomLogger):
|
|||
logging_payload: ManagementEndpointLoggingPayload,
|
||||
parent_otel_span: Optional[Span] = None,
|
||||
):
|
||||
from datetime import datetime
|
||||
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
@ -889,7 +881,6 @@ class OpenTelemetry(CustomLogger):
|
|||
logging_payload: ManagementEndpointLoggingPayload,
|
||||
parent_otel_span: Optional[Span] = None,
|
||||
):
|
||||
from datetime import datetime
|
||||
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
|
|
@ -3,8 +3,6 @@ import os
|
|||
import time
|
||||
from typing import Dict, Final, List, Optional
|
||||
|
||||
from litellm.types.utils import ModelResponse
|
||||
|
||||
CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config"
|
||||
|
||||
|
||||
|
|
|
@ -1,15 +1,10 @@
|
|||
# used for /metrics endpoint on LiteLLM Proxy
|
||||
#### What this does ####
|
||||
# On success, log events to Prometheus
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import Optional, TypedDict, Union
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
|
|
@ -2,13 +2,10 @@
|
|||
Helper functions to query prometheus API
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
import litellm
|
||||
from litellm import get_secret
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
|
|
|
@ -3,15 +3,8 @@
|
|||
# On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
|
||||
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
|
||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to Supabase
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import litellm
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to Supabase
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import dotenv
|
||||
|
||||
import litellm
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
import traceback
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
|
||||
|
@ -12,9 +11,7 @@ class TraceloopLogger:
|
|||
|
||||
def __init__(self):
|
||||
try:
|
||||
from opentelemetry.sdk.trace.export import ConsoleSpanExporter
|
||||
from traceloop.sdk import Traceloop
|
||||
from traceloop.sdk.instruments import Instruments
|
||||
from traceloop.sdk.tracing.tracing import TracerWrapper
|
||||
except ModuleNotFoundError as e:
|
||||
verbose_logger.error(
|
||||
|
@ -39,7 +36,6 @@ class TraceloopLogger:
|
|||
level="DEFAULT",
|
||||
status_message=None,
|
||||
):
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.semconv.ai import SpanAttributes
|
||||
from opentelemetry.trace import SpanKind, Status, StatusCode
|
||||
|
||||
|
@ -78,7 +74,7 @@ class TraceloopLogger:
|
|||
)
|
||||
if "top_p" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
|
||||
SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
|
||||
)
|
||||
if "tools" in optional_params or "functions" in optional_params:
|
||||
span.set_attribute(
|
||||
|
|
|
@ -173,16 +173,14 @@ except Exception:
|
|||
|
||||
#### What this does ####
|
||||
# On success, logs events to Langfuse
|
||||
import os
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class WeightsBiasesLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
try:
|
||||
import wandb
|
||||
pass
|
||||
except Exception:
|
||||
raise Exception(
|
||||
"\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
|
||||
|
|
|
@ -3,7 +3,6 @@ from typing import Awaitable, Callable, Optional
|
|||
|
||||
import anyio
|
||||
import anyio.to_thread
|
||||
from anyio import to_thread
|
||||
from typing_extensions import ParamSpec, TypeVar
|
||||
|
||||
T_ParamSpec = ParamSpec("T_ParamSpec")
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# What is this?
|
||||
## Helper utilities
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
import json
|
||||
import os
|
||||
import threading
|
||||
import traceback
|
||||
from typing import Optional
|
||||
|
||||
|
@ -14,17 +12,14 @@ from ..exceptions import (
|
|||
APIError,
|
||||
AuthenticationError,
|
||||
BadRequestError,
|
||||
BudgetExceededError,
|
||||
ContentPolicyViolationError,
|
||||
ContextWindowExceededError,
|
||||
NotFoundError,
|
||||
OpenAIError,
|
||||
PermissionDeniedError,
|
||||
RateLimitError,
|
||||
ServiceUnavailableError,
|
||||
Timeout,
|
||||
UnprocessableEntityError,
|
||||
UnsupportedParamsError,
|
||||
)
|
||||
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue