Merge branch 'main' into patch-1

This commit is contained in:
Ayub Kokabi 2025-02-24 16:37:25 +03:30 committed by GitHub
commit 095206cf0e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
1230 changed files with 107036 additions and 36584 deletions

File diff suppressed because it is too large Load diff

View file

@ -9,3 +9,5 @@ anthropic
orjson==3.9.15
pydantic==2.7.1
google-cloud-aiplatform==1.43.0
fastapi-sso==0.10.0
uvloop==0.21.0

View file

@ -9,3 +9,4 @@ tests
.devcontainer
*.tgz
log.txt
docker/Dockerfile.*

View file

@ -20,3 +20,8 @@ REPLICATE_API_TOKEN = ""
ANTHROPIC_API_KEY = ""
# Infisical
INFISICAL_TOKEN = ""
# Development Configs
LITELLM_MASTER_KEY = "sk-1234"
DATABASE_URL = "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
STORE_MODEL_IN_DB = "True"

View file

@ -22,7 +22,7 @@
<!-- List of changes -->
## [REQUIRED] Testing - Attach a screenshot of any new tests passing locall
## [REQUIRED] Testing - Attach a screenshot of any new tests passing locally
If UI changes, send a screenshot/GIF of working UI fixes
<!-- Test procedure -->

View file

@ -52,6 +52,39 @@ def interpret_results(csv_file):
return markdown_table
def _get_docker_run_command_stable_release(release_version):
return f"""
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm_stable_release_branch-{release_version}
"""
def _get_docker_run_command(release_version):
return f"""
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:main-{release_version}
"""
def get_docker_run_command(release_version):
if "stable" in release_version:
return _get_docker_run_command_stable_release(release_version)
else:
return _get_docker_run_command(release_version)
if __name__ == "__main__":
csv_file = "load_test_stats.csv" # Change this to the path of your CSV file
markdown_table = interpret_results(csv_file)
@ -79,17 +112,7 @@ if __name__ == "__main__":
start_index = latest_release.body.find("Load Test LiteLLM Proxy Results")
existing_release_body = latest_release.body[:start_index]
docker_run_command = f"""
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:main-{release_version}
```
"""
docker_run_command = get_docker_run_command(release_version)
print("docker run command: ", docker_run_command)
new_release_body = (

View file

@ -1,6 +1,4 @@
from locust import HttpUser, task, between, events
import json
import time
from locust import HttpUser, task, between
class MyUser(HttpUser):
@ -10,7 +8,7 @@ class MyUser(HttpUser):
def chat_completion(self):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
"Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
# Include any additional headers you may need for authentication, etc.
}

39
.github/workflows/reset_stable.yml vendored Normal file
View file

@ -0,0 +1,39 @@
name: Reset litellm_stable branch
on:
release:
types: [published, created]
jobs:
update-stable-branch:
if: ${{ startsWith(github.event.release.tag_name, 'v') && !endsWith(github.event.release.tag_name, '-stable') }}
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Reset litellm_stable_release_branch branch to the release commit
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Configure Git user
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
# Fetch all branches and tags
git fetch --all
# Check if the litellm_stable_release_branch branch exists
if git show-ref --verify --quiet refs/remotes/origin/litellm_stable_release_branch; then
echo "litellm_stable_release_branch branch exists."
git checkout litellm_stable_release_branch
else
echo "litellm_stable_release_branch branch does not exist. Creating it."
git checkout -b litellm_stable_release_branch
fi
# Reset litellm_stable_release_branch branch to the release commit
git reset --hard $GITHUB_SHA
# Push the updated litellm_stable_release_branch branch
git push origin litellm_stable_release_branch --force

20
.github/workflows/stale.yml vendored Normal file
View file

@ -0,0 +1,20 @@
name: "Stale Issue Management"
on:
schedule:
- cron: '0 0 * * *' # Runs daily at midnight UTC
workflow_dispatch:
jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v8
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
stale-issue-message: "This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
stale-pr-message: "This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
days-before-stale: 90 # Revert to 60 days
days-before-close: 7 # Revert to 7 days
stale-issue-label: "stale"
operations-per-run: 1000

13
.gitignore vendored
View file

@ -48,7 +48,7 @@ deploy/charts/litellm/charts/*
deploy/charts/*.tgz
litellm/proxy/vertex_key.json
**/.vim/
/node_modules
**/node_modules
kub.yaml
loadtest_kub.yaml
litellm/proxy/_new_secret_config.yaml
@ -66,3 +66,14 @@ litellm/tests/langfuse.log
litellm/tests/langfuse.log
litellm/proxy/google-cloud-sdk/*
tests/llm_translation/log.txt
venv/
tests/local_testing/log.txt
.codegpt
litellm/proxy/_new_new_secret_config.yaml
litellm/proxy/custom_guardrail.py
litellm/proxy/_experimental/out/404.html
litellm/proxy/_experimental/out/404.html
litellm/proxy/_experimental/out/model_hub.html
.mypy_cache/*
litellm/proxy/application.log

View file

@ -22,7 +22,7 @@ repos:
rev: 7.0.0 # The version of flake8 to use
hooks:
- id: flake8
exclude: ^litellm/tests/|^litellm/proxy/tests/
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
additional_dependencies: [flake8-print]
files: litellm/.*\.py
# - id: flake8

View file

@ -1,18 +1,20 @@
# Base image for building
ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/python:latest-dev
# Runtime image
ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/python:latest-dev
# Builder stage
FROM $LITELLM_BUILD_IMAGE AS builder
# Set the working directory to /app
WORKDIR /app
USER root
# Install build dependencies
RUN apt-get clean && apt-get update && \
apt-get install -y gcc python3-dev && \
rm -rf /var/lib/apt/lists/*
RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev
RUN pip install --upgrade pip && \
pip install build
@ -49,8 +51,12 @@ RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Runtime stage
FROM $LITELLM_RUNTIME_IMAGE AS runtime
# Update dependencies and clean up - handles debian security issue
RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
# Ensure runtime stage runs as root
USER root
# Install runtime dependencies
RUN apk update && \
apk add --no-cache openssl
WORKDIR /app
# Copy the current directory contents into the container at /app
@ -67,10 +73,11 @@ RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl
# Generate prisma client
RUN prisma generate
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp
ENTRYPOINT ["litellm"]
ENTRYPOINT ["docker/prod_entrypoint.sh"]
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
CMD ["--port", "4000"]

127
README.md
View file

@ -64,18 +64,54 @@ import os
## set ENV variables
os.environ["OPENAI_API_KEY"] = "your-openai-key"
os.environ["COHERE_API_KEY"] = "your-cohere-key"
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key"
messages = [{ "content": "Hello, how are you?","role": "user"}]
# openai call
response = completion(model="gpt-3.5-turbo", messages=messages)
response = completion(model="openai/gpt-4o", messages=messages)
# cohere call
response = completion(model="command-nightly", messages=messages)
# anthropic call
response = completion(model="anthropic/claude-3-sonnet-20240229", messages=messages)
print(response)
```
### Response (OpenAI Format)
```json
{
"id": "chatcmpl-565d891b-a42e-4c39-8d14-82a1f5208885",
"created": 1734366691,
"model": "claude-3-sonnet-20240229",
"object": "chat.completion",
"system_fingerprint": null,
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "Hello! As an AI language model, I don't have feelings, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
}
}
],
"usage": {
"completion_tokens": 43,
"prompt_tokens": 13,
"total_tokens": 56,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
},
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0
}
}
```
Call any model supported by a provider, with `model=<provider_name>/<model_name>`. There might be provider-specific details here, so refer to [provider docs for more information](https://docs.litellm.ai/docs/providers)
## Async ([Docs](https://docs.litellm.ai/docs/completion/stream#async-completion))
@ -87,7 +123,7 @@ import asyncio
async def test_get_response():
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
response = await acompletion(model="gpt-3.5-turbo", messages=messages)
response = await acompletion(model="openai/gpt-4o", messages=messages)
return response
response = asyncio.run(test_get_response())
@ -101,37 +137,63 @@ Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure,
```python
from litellm import completion
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
response = completion(model="openai/gpt-4o", messages=messages, stream=True)
for part in response:
print(part.choices[0].delta.content or "")
# claude 2
response = completion('claude-2', messages, stream=True)
response = completion('anthropic/claude-3-sonnet-20240229', messages, stream=True)
for part in response:
print(part.choices[0].delta.content or "")
print(part)
```
### Response chunk (OpenAI Format)
```json
{
"id": "chatcmpl-2be06597-eb60-4c70-9ec5-8cd2ab1b4697",
"created": 1734366925,
"model": "claude-3-sonnet-20240229",
"object": "chat.completion.chunk",
"system_fingerprint": null,
"choices": [
{
"finish_reason": null,
"index": 0,
"delta": {
"content": "Hello",
"role": "assistant",
"function_call": null,
"tool_calls": null,
"audio": null
},
"logprobs": null
}
]
}
```
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack, MLflow
LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
```python
from litellm import completion
## set env variables for logging tools
## set env variables for logging tools (when using MLflow, no API key set up is required)
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
os.environ["OPENAI_API_KEY"]
os.environ["OPENAI_API_KEY"] = "your-openai-key"
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
litellm.success_callback = ["lunary", "mlflow", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
response = completion(model="openai/gpt-4o", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
```
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
@ -200,7 +262,7 @@ echo 'LITELLM_MASTER_KEY="sk-1234"' > .env
# Add the litellm salt key - you cannot change this after adding a model
# It is used to encrypt / decrypt your LLM API Key credentials
# We recommned - https://1password.com/password-generator/
# We recommend - https://1password.com/password-generator/
# password generator to get a random hash for litellm salt key
echo 'LITELLM_SALT_KEY="sk-1234"' > .env
@ -241,6 +303,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [AI/ML API](https://docs.litellm.ai/docs/providers/aiml) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
@ -280,25 +343,32 @@ curl 'http://0.0.0.0:4000/key/generate' \
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
Here's how to modify the repo locally:
Step 1: Clone the repo
```
git clone https://github.com/BerriAI/litellm.git
```
Step 2: Navigate into the project, and install dependencies:
Step 2: Install dependencies:
```
cd litellm
poetry install -E extra_proxy -E proxy
pip install -r requirements.txt
```
Step 3: Test your change:
a. Add a pytest test within `tests/litellm/`
This folder follows the same directory structure as `litellm/`.
If a corresponding test file does not exist, create one.
b. Run the test
```
cd litellm/tests # pwd: Documents/litellm/litellm/tests
poetry run flake8
poetry run pytest .
cd tests/litellm # pwd: Documents/litellm/litellm/tests/litellm
pytest /path/to/test_file.py
```
Step 4: Submit a PR with your changes! 🚀
@ -388,3 +458,20 @@ If you have suggestions on how to improve the code quality feel free to open an
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
</a>
## Run in Developer mode
### Services
1. Setup .env file in root
2. Run dependant services `docker-compose up db prometheus`
### Backend
1. (In root) create virtual environment `python -m venv .venv`
2. Activate virtual environment `source .venv/bin/activate`
3. Install dependencies `pip install -e ".[all]"`
4. Start proxy backend `uvicorn litellm.proxy.proxy_server:app --host localhost --port 4000 --reload`
### Frontend
1. Navigate to `ui/litellm-dashboard`
2. Install dependencies `npm install`
3. Run `npm run dev` to start the dashboard

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,423 +1,422 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "BmX0b5Ueh91v"
},
"source": [
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
"* Completion - Quick start\n",
"* Completion - Streaming\n",
"* Completion - Azure, OpenAI in separate threads\n",
"* Completion - Stress Test 10 requests in parallel\n",
"* Completion - Azure, OpenAI in the same thread"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
"* Completion - Quick start\n",
"* Completion - Streaming\n",
"* Completion - Azure, OpenAI in separate threads\n",
"* Completion - Stress Test 10 requests in parallel\n",
"* Completion - Azure, OpenAI in the same thread"
],
"metadata": {
"id": "BmX0b5Ueh91v"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iHq4d0dpfawS"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "mnveHO5dfcB0"
},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eo88QUdbiDIE"
},
"source": [
"## Completion - Quick start"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
},
"outputs": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iHq4d0dpfawS"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "code",
"source": [
"import os, litellm"
],
"metadata": {
"id": "mnveHO5dfcB0"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Quick start"
],
"metadata": {
"id": "eo88QUdbiDIE"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5OSosWNCfc_2",
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Openai Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708958,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 26,\n",
" \"total_tokens\": 39\n",
" }\n",
"}\n",
"Azure Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708960,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 27,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 41\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Completion - Streaming"
],
"metadata": {
"id": "dQMkM-diiKdE"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"OpenAI Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Azure Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n"
],
"metadata": {
"id": "uVvJDVn4g1i1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in separate threads"
],
"metadata": {
"id": "4xrOPnt-oqwm"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create threads for making the completions\n",
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
"\n",
"# Start both threads\n",
"thread1.start()\n",
"thread2.start()\n",
"\n",
"# Wait for both threads to finish\n",
"thread1.join()\n",
"thread2.join()\n",
"\n",
"print(\"Both completions are done.\")"
],
"metadata": {
"id": "V5b5taJPjvC3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Stress Test 10 requests in parallel\n",
"\n"
],
"metadata": {
"id": "lx8DbMBqoAoN"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# Set your API keys\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create and start 10 threads for making completions\n",
"threads = []\n",
"for i in range(10):\n",
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
" threads.append(thread)\n",
" thread.start()\n",
"\n",
"# Wait for all threads to finish\n",
"for thread in threads:\n",
" thread.join()\n",
"\n",
"print(\"All completions are done.\")\n"
],
"metadata": {
"id": "pHYANOlOkoDh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Completion - Azure, OpenAI in the same thread"
],
"metadata": {
"id": "yB2NDOO4oxrp"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710847,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 42\n",
" }\n",
"}\n",
"Azure OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710849,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 29,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
}
]
"name": "stdout",
"output_type": "stream",
"text": [
"Openai Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708958,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 26,\n",
" \"total_tokens\": 39\n",
" }\n",
"}\n",
"Azure Response\n",
"\n",
"{\n",
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694708960,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 27,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 41\n",
" }\n",
"}\n"
]
}
]
],
"source": [
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Openai Response\\n\")\n",
"print(response)\n",
"\n",
"\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
")\n",
"print(\"Azure Response\\n\")\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dQMkM-diiKdE"
},
"source": [
"## Completion - Streaming"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uVvJDVn4g1i1"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model = \"gpt-3.5-turbo\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"OpenAI Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n",
"\n",
"# azure call\n",
"response = completion(\n",
" model = \"azure/your-azure-deployment\",\n",
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
" stream=True\n",
")\n",
"print(\"Azure Streaming response\")\n",
"for chunk in response:\n",
" print(chunk)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4xrOPnt-oqwm"
},
"source": [
"## Completion - Azure, OpenAI in separate threads"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "V5b5taJPjvC3"
},
"outputs": [],
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# openai configs\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
"# azure openai configs\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create threads for making the completions\n",
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
"\n",
"# Start both threads\n",
"thread1.start()\n",
"thread2.start()\n",
"\n",
"# Wait for both threads to finish\n",
"thread1.join()\n",
"thread2.join()\n",
"\n",
"print(\"Both completions are done.\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lx8DbMBqoAoN"
},
"source": [
"## Completion - Stress Test 10 requests in parallel\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pHYANOlOkoDh"
},
"outputs": [],
"source": [
"import os\n",
"import threading\n",
"from litellm import completion\n",
"\n",
"# Function to make a completion call\n",
"def make_completion(model, messages):\n",
" response = completion(\n",
" model=model,\n",
" messages=messages\n",
" )\n",
"\n",
" print(f\"Response for {model}: {response}\")\n",
"\n",
"# Set your API keys\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
"# Define the messages for the completions\n",
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
"\n",
"# Create and start 10 threads for making completions\n",
"threads = []\n",
"for i in range(10):\n",
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
" threads.append(thread)\n",
" thread.start()\n",
"\n",
"# Wait for all threads to finish\n",
"for thread in threads:\n",
" thread.join()\n",
"\n",
"print(\"All completions are done.\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "yB2NDOO4oxrp"
},
"source": [
"## Completion - Azure, OpenAI in the same thread"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HTBqwzxpnxab",
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710847,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 29,\n",
" \"total_tokens\": 42\n",
" }\n",
"}\n",
"Azure OpenAI Response: {\n",
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1694710849,\n",
" \"model\": \"gpt-35-turbo\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"finish_reason\": \"stop\",\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
" }\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"completion_tokens\": 29,\n",
" \"prompt_tokens\": 14,\n",
" \"total_tokens\": 43\n",
" }\n",
"}\n"
]
}
],
"source": [
"import os\n",
"from litellm import completion\n",
"\n",
"# Function to make both OpenAI and Azure completions\n",
"def make_completions():\n",
" # Set your OpenAI API key\n",
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"\n",
" # OpenAI completion\n",
" openai_response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"OpenAI Response:\", openai_response)\n",
"\n",
" # Set your Azure OpenAI API key and configuration\n",
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
"\n",
" # Azure OpenAI completion\n",
" azure_response = completion(\n",
" model=\"azure/your-azure-deployment\",\n",
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
" )\n",
"\n",
" print(\"Azure OpenAI Response:\", azure_response)\n",
"\n",
"# Call the function to make both completions in one thread\n",
"make_completions()\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View file

@ -1,166 +1,163 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "MbLbs1tbISk-"
},
"source": [
"# LiteLLM Batch Completions Example\n",
"\n",
"* This tutorial walks through using `batch_completion`\n",
"* Docs: https://docs.litellm.ai/docs/completion/batching"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# LiteLLM Batch Completions Example\n",
"\n",
"* This tutorial walks through using `batch_completion`\n",
"* Docs: https://docs.litellm.ai/docs/completion/batching"
],
"metadata": {
"id": "MbLbs1tbISk-"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ty6-ko_aDlPF"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KGhNJRUCIh1j"
},
"source": [
"## Import Batch Completion"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "LOtI43snDrSK"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import batch_completion\n",
"\n",
"# set your API_KEY\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Xhv92NBaIpaw"
},
"source": [
"## Calling `litellm.batch_completion`\n",
"\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
},
"outputs": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ty6-ko_aDlPF"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"source": [
"## Import Batch Completion"
],
"metadata": {
"id": "KGhNJRUCIh1j"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"# set your API_KEY\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\""
],
"metadata": {
"id": "LOtI43snDrSK"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling `litellm.batch_completion`\n",
"\n",
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
],
"metadata": {
"id": "Xhv92NBaIpaw"
}
},
{
"cell_type": "code",
"source": [
"import litellm\n",
"import os\n",
"from litellm import batch_completion\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yY7GIRLsDywu",
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Good morning!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030351.309254,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 3,\n",
" \"total_tokens\": 14\n",
" }\n",
" },\n",
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030352.1215081,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 22,\n",
" \"total_tokens\": 35\n",
" }\n",
" }]"
]
},
"metadata": {},
"execution_count": 11
}
"data": {
"text/plain": [
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" Good morning!\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030351.309254,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 11,\n",
" \"completion_tokens\": 3,\n",
" \"total_tokens\": 14\n",
" }\n",
" },\n",
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
" \"choices\": [\n",
" {\n",
" \"finish_reason\": \"stop\",\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
" \"role\": \"assistant\",\n",
" \"logprobs\": null\n",
" }\n",
" }\n",
" ],\n",
" \"created\": 1694030352.1215081,\n",
" \"model\": \"claude-2\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 13,\n",
" \"completion_tokens\": 22,\n",
" \"total_tokens\": 35\n",
" }\n",
" }]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
]
],
"source": [
"import os\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"\n",
"\n",
"responses = batch_completion(\n",
" model=\"claude-2\",\n",
" messages = [\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"good morning? \"\n",
" }\n",
" ],\n",
" [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what's the time? \"\n",
" }\n",
" ]\n",
" ]\n",
")\n",
"responses"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,204 +1,205 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "680oRk1af-xJ"
},
"source": [
"# Environment Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "X7TgJFn8f88p"
},
"outputs": [],
"source": [
"import csv\n",
"from typing import Optional\n",
"import httpx, json\n",
"import asyncio\n",
"\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rauw8EOhgBz5"
},
"outputs": [],
"source": [
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
"class HTTPHandler:\n",
" def __init__(self, concurrent_limit=1000):\n",
" # Create a client with a connection pool\n",
" self.client = httpx.AsyncClient(\n",
" limits=httpx.Limits(\n",
" max_connections=concurrent_limit,\n",
" max_keepalive_connections=concurrent_limit,\n",
" )\n",
" )\n",
"\n",
" async def close(self):\n",
" # Close the client when you're done with it\n",
" await self.client.aclose()\n",
"\n",
" async def get(\n",
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
" ):\n",
" response = await self.client.get(url, params=params, headers=headers)\n",
" return response\n",
"\n",
" async def post(\n",
" self,\n",
" url: str,\n",
" data: Optional[dict] = None,\n",
" params: Optional[dict] = None,\n",
" headers: Optional[dict] = None,\n",
" ):\n",
" try:\n",
" response = await self.client.post(\n",
" url, data=data, params=params, headers=headers\n",
" )\n",
" return response\n",
" except Exception as e:\n",
" raise e\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7LXN8zaLgOie"
},
"source": [
"# Import Sheet\n",
"\n",
"\n",
"Format: | ID | Name | Max Budget |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oiED0usegPGf"
},
"outputs": [],
"source": [
"async def import_sheet():\n",
" tasks = []\n",
" http_client = HTTPHandler()\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for row in csv_reader:\n",
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
" tasks.append(task)\n",
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
"\n",
" keys = await asyncio.gather(*tasks)\n",
"\n",
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
" csv_writer.writeheader()\n",
"\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for i, row in enumerate(csv_reader):\n",
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
" csv_writer.writerow(row)\n",
"\n",
" await http_client.close()\n",
"\n",
"asyncio.run(import_sheet())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E7M0Li_UgJeZ"
},
"source": [
"# Create Users + Keys\n",
"\n",
"- Creates a user\n",
"- Creates a key with max budget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZudRFujf7j-"
},
"outputs": [],
"source": [
"\n",
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"key/generate\"\n",
"\n",
" # call /key/generate\n",
" print(\"CALLING /KEY/GENERATE\")\n",
" response = await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"key_alias\": f\"{user_id}-key\",\n",
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
" })\n",
" )\n",
" print(f\"response: {response.text}\")\n",
" return response.json()[\"key\"]\n",
"\n",
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
" \"\"\"\n",
" - call /user/new\n",
" - create key for user\n",
" \"\"\"\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"user/new\"\n",
"\n",
" # call /user/new\n",
" await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"user_alias\": user_name,\n",
" \"auto_create_key\": False,\n",
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
" })\n",
" )\n",
"\n",
" # create key for user\n",
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "680oRk1af-xJ"
},
"source": [
"# Environment Setup"
]
},
"nbformat": 4,
"nbformat_minor": 0
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "X7TgJFn8f88p"
},
"outputs": [],
"source": [
"import csv\n",
"from typing import Optional\n",
"import httpx\n",
"import json\n",
"import asyncio\n",
"\n",
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rauw8EOhgBz5"
},
"outputs": [],
"source": [
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
"class HTTPHandler:\n",
" def __init__(self, concurrent_limit=1000):\n",
" # Create a client with a connection pool\n",
" self.client = httpx.AsyncClient(\n",
" limits=httpx.Limits(\n",
" max_connections=concurrent_limit,\n",
" max_keepalive_connections=concurrent_limit,\n",
" )\n",
" )\n",
"\n",
" async def close(self):\n",
" # Close the client when you're done with it\n",
" await self.client.aclose()\n",
"\n",
" async def get(\n",
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
" ):\n",
" response = await self.client.get(url, params=params, headers=headers)\n",
" return response\n",
"\n",
" async def post(\n",
" self,\n",
" url: str,\n",
" data: Optional[dict] = None,\n",
" params: Optional[dict] = None,\n",
" headers: Optional[dict] = None,\n",
" ):\n",
" try:\n",
" response = await self.client.post(\n",
" url, data=data, params=params, headers=headers\n",
" )\n",
" return response\n",
" except Exception as e:\n",
" raise e\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7LXN8zaLgOie"
},
"source": [
"# Import Sheet\n",
"\n",
"\n",
"Format: | ID | Name | Max Budget |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oiED0usegPGf"
},
"outputs": [],
"source": [
"async def import_sheet():\n",
" tasks = []\n",
" http_client = HTTPHandler()\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for row in csv_reader:\n",
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
" tasks.append(task)\n",
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
"\n",
" keys = await asyncio.gather(*tasks)\n",
"\n",
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
" csv_writer.writeheader()\n",
"\n",
" with open('my-batch-sheet.csv', 'r') as file:\n",
" csv_reader = csv.DictReader(file)\n",
" for i, row in enumerate(csv_reader):\n",
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
" csv_writer.writerow(row)\n",
"\n",
" await http_client.close()\n",
"\n",
"asyncio.run(import_sheet())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E7M0Li_UgJeZ"
},
"source": [
"# Create Users + Keys\n",
"\n",
"- Creates a user\n",
"- Creates a key with max budget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZudRFujf7j-"
},
"outputs": [],
"source": [
"\n",
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"key/generate\"\n",
"\n",
" # call /key/generate\n",
" print(\"CALLING /KEY/GENERATE\")\n",
" response = await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"key_alias\": f\"{user_id}-key\",\n",
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
" })\n",
" )\n",
" print(f\"response: {response.text}\")\n",
" return response.json()[\"key\"]\n",
"\n",
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
" \"\"\"\n",
" - call /user/new\n",
" - create key for user\n",
" \"\"\"\n",
" global proxy_base_url\n",
" if not proxy_base_url.endswith(\"/\"):\n",
" proxy_base_url += \"/\"\n",
" url = proxy_base_url + \"user/new\"\n",
"\n",
" # call /user/new\n",
" await client.post(\n",
" url=url,\n",
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
" data=json.dumps({\n",
" \"user_id\": user_id,\n",
" \"user_alias\": user_name,\n",
" \"auto_create_key\": False,\n",
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
" })\n",
" )\n",
"\n",
" # create key for user\n",
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because it is too large Load diff

View file

@ -1,159 +1,157 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "eKXncoQbU_2j"
},
"source": [
"# Using Nemo-Guardrails with LiteLLM Server\n",
"\n",
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Using Nemo-Guardrails with LiteLLM Server\n",
"\n",
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
],
"metadata": {
"id": "eKXncoQbU_2j"
}
},
{
"cell_type": "markdown",
"source": [
"## Using with Bedrock\n",
"\n",
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
],
"metadata": {
"id": "ZciYaLwvuFbu"
}
},
{
"cell_type": "code",
"source": [
"pip install nemoguardrails langchain"
],
"metadata": {
"id": "vOUwGSJ2Vsy3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xXEJNxe7U0IN"
},
"outputs": [],
"source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
]
},
{
"cell_type": "markdown",
"source": [
"## Using with TogetherAI\n",
"\n",
"1. You can either set this in the server environment:\n",
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
"\n",
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
],
"metadata": {
"id": "vz5n00qyuKjp"
}
},
{
"cell_type": "code",
"source": [
"import openai\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
],
"metadata": {
"id": "XK1sk-McuhpE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### CONFIG.YML\n",
"\n",
"save this example `config.yml` in your current directory"
],
"metadata": {
"id": "8A1KWKnzuxAS"
}
},
{
"cell_type": "code",
"source": [
"# instructions:\n",
"# - type: general\n",
"# content: |\n",
"# Below is a conversation between a bot and a user about the recent job reports.\n",
"# The bot is factual and concise. If the bot does not know the answer to a\n",
"# question, it truthfully says it does not know.\n",
"\n",
"# sample_conversation: |\n",
"# user \"Hello there!\"\n",
"# express greeting\n",
"# bot express greeting\n",
"# \"Hello! How can I assist you today?\"\n",
"# user \"What can you do for me?\"\n",
"# ask about capabilities\n",
"# bot respond about capabilities\n",
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
"# user \"What's 2+2?\"\n",
"# ask math question\n",
"# bot responds to math question\n",
"# \"2+2 is equal to 4.\"\n",
"\n",
"# models:\n",
"# - type: main\n",
"# engine: openai\n",
"# model: claude-instant-1"
],
"metadata": {
"id": "NKN1GmSvu0Cx"
},
"execution_count": null,
"outputs": []
}
]
{
"cell_type": "markdown",
"metadata": {
"id": "ZciYaLwvuFbu"
},
"source": [
"## Using with Bedrock\n",
"\n",
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "vOUwGSJ2Vsy3"
},
"outputs": [],
"source": [
"pip install nemoguardrails langchain"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xXEJNxe7U0IN"
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vz5n00qyuKjp"
},
"source": [
"## Using with TogetherAI\n",
"\n",
"1. You can either set this in the server environment:\n",
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
"\n",
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XK1sk-McuhpE"
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
"\n",
"from nemoguardrails import LLMRails, RailsConfig\n",
"\n",
"config = RailsConfig.from_path(\"./config.yml\")\n",
"app = LLMRails(config, llm=llm)\n",
"\n",
"new_message = app.generate(messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": \"Hello! What can you do for me?\"\n",
"}])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8A1KWKnzuxAS"
},
"source": [
"### CONFIG.YML\n",
"\n",
"save this example `config.yml` in your current directory"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NKN1GmSvu0Cx"
},
"outputs": [],
"source": [
"# instructions:\n",
"# - type: general\n",
"# content: |\n",
"# Below is a conversation between a bot and a user about the recent job reports.\n",
"# The bot is factual and concise. If the bot does not know the answer to a\n",
"# question, it truthfully says it does not know.\n",
"\n",
"# sample_conversation: |\n",
"# user \"Hello there!\"\n",
"# express greeting\n",
"# bot express greeting\n",
"# \"Hello! How can I assist you today?\"\n",
"# user \"What can you do for me?\"\n",
"# ask about capabilities\n",
"# bot respond about capabilities\n",
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
"# user \"What's 2+2?\"\n",
"# ask math question\n",
"# bot responds to math question\n",
"# \"2+2 is equal to 4.\"\n",
"\n",
"# models:\n",
"# - type: main\n",
"# engine: openai\n",
"# model: claude-instant-1"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,16 +1,12 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import litellm
from litellm import embedding, completion, completion_cost
from autoevals.llm import *
###################
import litellm
# litellm completion call
question = "which country has the highest population"

View file

@ -1,11 +1,12 @@
import traceback
from flask import Flask, request, jsonify, abort, Response
from flask import Flask, request, Response
from flask_cors import CORS
import traceback
import litellm
from util import handle_error
from litellm import completion
import os, dotenv, time
import os
import dotenv
import time
import json
dotenv.load_dotenv()
@ -20,9 +21,9 @@ verbose = True
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
######### PROMPT LOGGING ##########
os.environ[
"PROMPTLAYER_API_KEY"
] = "" # set your promptlayer key here - https://promptlayer.com/
os.environ["PROMPTLAYER_API_KEY"] = (
"" # set your promptlayer key here - https://promptlayer.com/
)
# set callbacks
litellm.success_callback = ["promptlayer"]
@ -57,9 +58,9 @@ def api_completion():
try:
if "prompt" not in data:
raise ValueError("data needs to have prompt")
data[
"model"
] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
data["model"] = (
"togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
)
# COMPLETION CALL
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
messages = [
@ -75,7 +76,7 @@ def api_completion():
"stream" in data and data["stream"] == True
): # use generate_responses to stream responses
return Response(data_generator(response), mimetype="text/event-stream")
except Exception as e:
except Exception:
# call handle_error function
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
## LOG FAILURE

View file

@ -1,5 +1,4 @@
import requests
from urllib.parse import urlparse, parse_qs
def get_next_url(response):

View file

@ -1,238 +1,237 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "gZx-wHJapG5w"
},
"source": [
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
"\n",
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
"\n",
"\n",
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
"Example call\n",
"```python\n",
"model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```"
]
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4JSRa0QVogPo"
},
"outputs": [],
"source": [
"!pip install litellm==0.1.399\n",
"!pip install baseten urllib3"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "VEukLhDzo4vw"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4STYM2OHFNlc"
},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"id": "DorpLxw1FHbC"
},
"outputs": [],
"source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "syF3dTdKFSQQ"
},
"source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
},
"outputs": [
{
"cell_type": "markdown",
"source": [
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
"\n",
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
"\n",
"\n",
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
"Example call\n",
"```python\n",
"model = \"q841o8w\" # baseten model version ID\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"```"
],
"metadata": {
"id": "gZx-wHJapG5w"
}
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4JSRa0QVogPo"
},
"outputs": [],
"source": [
"!pip install litellm==0.1.399\n",
"!pip install baseten urllib3"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"import litellm\n",
"from litellm import completion"
],
"metadata": {
"id": "VEukLhDzo4vw"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Setup"
],
"metadata": {
"id": "4STYM2OHFNlc"
}
},
{
"cell_type": "code",
"source": [
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
],
"metadata": {
"id": "DorpLxw1FHbC"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "syF3dTdKFSQQ"
}
},
{
"cell_type": "code",
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rPgSoMlsojz0",
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
" 'created': 1692135883.699066,\n",
" 'model': 'qvv0xeq'}"
]
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "7n21UroEGCGa"
}
},
{
"cell_type": "code",
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
},
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
" 'created': 1692135900.2806294,\n",
" 'model': 'q841o8w'}"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "markdown",
"source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`"
],
"metadata": {
"id": "6-TFwmPAGPXq"
}
},
{
"cell_type": "code",
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
" 'created': 1692135914.7472186,\n",
" 'model': '31dxrj3'}"
]
},
"metadata": {},
"execution_count": 20
}
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
" 'created': 1692135883.699066,\n",
" 'model': 'qvv0xeq'}"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
]
],
"source": [
"model = \"qvv0xeq\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7n21UroEGCGa"
},
"source": [
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uLVWFH899lAF",
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
" 'created': 1692135900.2806294,\n",
" 'model': 'q841o8w'}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = \"q841o8w\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6-TFwmPAGPXq"
},
"source": [
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
"### Pass Your Baseten model `Version ID` as `model`"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gbeYZOrUE_Bp",
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32mINFO\u001b[0m API key set.\n",
"INFO:baseten:API key set.\n"
]
},
{
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'message': {'role': 'assistant',\n",
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
" 'created': 1692135914.7472186,\n",
" 'model': '31dxrj3'}"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = \"31dxrj3\"\n",
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
"response"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,201 +1,195 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "5hwntUxTMxEk"
},
"source": [
"# Langchain liteLLM Demo Notebook\n",
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
"\n",
"Call all LLM models using the same I/O interface\n",
"\n",
"Example usage\n",
"```python\n",
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Langchain liteLLM Demo Notebook\n",
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
"\n",
"Call all LLM models using the same I/O interface\n",
"\n",
"Example usage\n",
"```python\n",
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"ChatLiteLLM(model=\"command-nightly\")\n",
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"```"
],
"metadata": {
"id": "5hwntUxTMxEk"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aPNAUsCvB6Sv"
},
"outputs": [],
"source": [
"!pip install litellm langchain"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "MOhRaVnhB-0J"
},
"outputs": [],
"source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.schema import HumanMessage"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"outputs": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aPNAUsCvB6Sv"
},
"outputs": [],
"source": [
"!pip install litellm langchain"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
],
"metadata": {
"id": "MOhRaVnhB-0J"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TahkCtlmCD65",
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"metadata": {},
"execution_count": 30
}
"data": {
"text/plain": [
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
]
],
"source": [
"os.environ['OPENAI_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uXNDyU4jChcs",
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "czbDJRKcC7BV",
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tZxpq5PDDY9Y",
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.environ['COHERE_API_KEY'] = \"\"\n",
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
"messages = [\n",
" HumanMessage(\n",
" content=\"what model are you?\"\n",
" )\n",
"]\n",
"chat(messages)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -43,7 +43,7 @@
"source": [
"# set you Vertex AI configs\n",
"import litellm\n",
"from litellm import embedding, completion\n",
"from litellm import completion\n",
"\n",
"litellm.vertex_project = \"hardy-device-386718\"\n",
"litellm.vertex_location = \"us-central1\""

View file

@ -1,331 +1,331 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "vnvlwUDZK7VA"
},
"source": [
"## Demo Notebook of Function Calling with liteLLM\n",
"- Supported Providers for Function Calling\n",
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
"- In this notebook we use function calling with `litellm.completion()`"
]
},
"cells": [
{
"cell_type": "markdown",
"source": [
"## Demo Notebook of Function Calling with liteLLM\n",
"- Supported Providers for Function Calling\n",
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
"- In this notebook we use function calling with `litellm.completion()`"
],
"metadata": {
"id": "vnvlwUDZK7VA"
}
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KrINCwRfLgZV"
},
"outputs": [],
"source": [
"## Install liteLLM\n",
"!pip install litellm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "nK7zR5OgLlh2"
},
"outputs": [],
"source": [
"import os\n",
"from litellm import completion"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"id": "dCQlyBxKLqbA"
},
"outputs": [],
"source": [
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "gfdGv-FMRCdX"
},
"source": [
"## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "ERzsP1sfM19C"
},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
"]\n",
"\n",
"def get_current_weather(location):\n",
" if location == \"Boston, MA\":\n",
" return \"The weather is 12F\"\n",
"\n",
"functions = [\n",
" {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
" },\n",
" \"unit\": {\n",
" \"type\": \"string\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
" }\n",
" },\n",
" \"required\": [\"location\"]\n",
" }\n",
" }\n",
" ]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NX6by2VuRPnp"
},
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
},
"outputs": [
{
"cell_type": "code",
"source": [
"## Install liteLLM\n",
"!pip install litellm"
],
"metadata": {
"id": "KrINCwRfLgZV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import os, litellm\n",
"from litellm import completion"
],
"metadata": {
"id": "nK7zR5OgLlh2"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
],
"metadata": {
"id": "dCQlyBxKLqbA"
},
"execution_count": 27,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Define Messages, Functions\n",
"We create a get_current_weather() function and pass that to GPT 3.5\n",
"\n",
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
],
"metadata": {
"id": "gfdGv-FMRCdX"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
"]\n",
"\n",
"def get_current_weather(location):\n",
" if location == \"Boston, MA\":\n",
" return \"The weather is 12F\"\n",
"\n",
"functions = [\n",
" {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
" },\n",
" \"unit\": {\n",
" \"type\": \"string\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
" }\n",
" },\n",
" \"required\": [\"location\"]\n",
" }\n",
" }\n",
" ]"
],
"metadata": {
"id": "ERzsP1sfM19C"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
],
"metadata": {
"id": "NX6by2VuRPnp"
}
},
{
"cell_type": "code",
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QVoJ5PtxMlVx",
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801223,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": null,\n",
" \"function_call\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
" }\n",
" },\n",
" \"finish_reason\": \"function_call\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 82,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 100\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call"
],
"metadata": {
"id": "Yu0o2saDNLx8"
}
},
{
"cell_type": "code",
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"}"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"get_current_weather {'location': 'Boston, MA'}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Call the get_current_weather() function"
],
"metadata": {
"id": "z3tstH_yN3fX"
}
},
{
"cell_type": "code",
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
},
"execution_count": 24,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"12F\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Send the response from get_current_weather back to the model to summarize"
],
"metadata": {
"id": "k4HGJE3NRmMI"
}
},
{
"cell_type": "code",
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801963,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 109,\n",
" \"completion_tokens\": 12,\n",
" \"total_tokens\": 121\n",
" }\n",
"}\n"
]
}
]
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801223,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": null,\n",
" \"function_call\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
" }\n",
" },\n",
" \"finish_reason\": \"function_call\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 82,\n",
" \"completion_tokens\": 18,\n",
" \"total_tokens\": 100\n",
" }\n",
"}\n"
]
}
]
],
"source": [
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Yu0o2saDNLx8"
},
"source": [
"## Parse GPT 3.5 Response\n",
"Read Information about what Function to Call"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u1DzXLJsNOR5",
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
},
"outputs": [
{
"data": {
"text/plain": [
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
" \"name\": \"get_current_weather\",\n",
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
"}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
"function_call_data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tYb96Mh0NhH9",
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"get_current_weather {'location': 'Boston, MA'}\n"
]
}
],
"source": [
"import json\n",
"function_name = function_call_data['name']\n",
"function_args = function_call_data['arguments']\n",
"function_args = json.loads(function_args)\n",
"print(function_name, function_args)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "z3tstH_yN3fX"
},
"source": [
"## Call the get_current_weather() function"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TSb8JHhgN5Zc",
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"12F\n"
]
}
],
"source": [
"if function_name == \"get_current_weather\":\n",
" result = get_current_weather(**function_args)\n",
" print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "k4HGJE3NRmMI"
},
"source": [
"## Send the response from get_current_weather back to the model to summarize"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a23cmEwiPaw7",
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1691801963,\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"choices\": [\n",
" {\n",
" \"index\": 0,\n",
" \"message\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
" },\n",
" \"finish_reason\": \"stop\"\n",
" }\n",
" ],\n",
" \"usage\": {\n",
" \"prompt_tokens\": 109,\n",
" \"completion_tokens\": 12,\n",
" \"total_tokens\": 121\n",
" }\n",
"}\n"
]
}
],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
"]\n",
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
"print(response)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1 +1 @@
litellm
litellm==1.55.3

View file

@ -1,13 +1,13 @@
import openai
api_base = f"http://0.0.0.0:8000"
api_base = "http://0.0.0.0:8000"
openai.api_base = api_base
openai.api_key = "temp-key"
print(openai.api_base)
print(f"LiteLLM: response from proxy with streaming")
print("LiteLLM: response from proxy with streaming")
response = openai.ChatCompletion.create(
model="ollama/llama2",
messages=[

File diff suppressed because one or more lines are too long

View file

@ -1,52 +1,51 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "j6yJsCGeaq8G"
},
"outputs": [],
"source": [
"!pip install litellm"
]
},
"cells": [
{
"cell_type": "code",
"source": [
"!pip install litellm"
],
"metadata": {
"id": "j6yJsCGeaq8G"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u129iWNPaf72"
},
"outputs": [],
"source": [
"import litellm\n",
"from litellm import embedding, completion\n",
"\n",
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
"\n",
"user_message = \"Hello, how are you?\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"for model in model_fallback_list:\n",
" try:\n",
" response = completion(model=model, messages=messages)\n",
" except Exception as e:\n",
" print(f\"error occurred: {traceback.format_exc()}\")"
]
}
]
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u129iWNPaf72"
},
"outputs": [],
"source": [
"from litellm import completion\n",
"\n",
"model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
"\n",
"user_message = \"Hello, how are you?\"\n",
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
"\n",
"for model in model_fallback_list:\n",
" try:\n",
" response = completion(model=model, messages=messages)\n",
" except Exception:\n",
" print(f\"error occurred: {traceback.format_exc()}\")"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -1,14 +1,12 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
@ -137,7 +135,7 @@ for future in futures:
else:
failed_calls += 1
print(f"Load test Summary:")
print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
@ -160,7 +158,7 @@ for future in futures:
else:
failed_calls += 1
print(f"Load test Summary:")
print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,12 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from litellm import Router
import litellm
@ -132,7 +130,7 @@ for future in futures:
else:
failed_calls += 1
print(f"Load test Summary:")
print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -1,14 +1,9 @@
from fastapi import FastAPI
import uvicorn
from memory_profiler import profile, memory_usage
from memory_profiler import profile
import os
import traceback
import asyncio
import pytest
import litellm
from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
import uuid

View file

@ -1,17 +1,16 @@
#### What this tests ####
from memory_profiler import profile, memory_usage
import sys, os, time
import traceback, asyncio
import pytest
from memory_profiler import profile
import sys
import os
import time
import asyncio
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
import uuid

View file

@ -1,17 +1,16 @@
#### What this tests ####
from memory_profiler import profile, memory_usage
import sys, os, time
import traceback, asyncio
import pytest
from memory_profiler import profile
import sys
import os
import time
import asyncio
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
import uuid

View file

@ -1,17 +1,14 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
import copy
load_dotenv()
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import asyncio
from litellm import Router, Timeout
from litellm import Timeout
import time
from litellm.caching.caching import Cache
import litellm
import openai
### Test just calling AsyncAzureOpenAI

View file

@ -1,7 +1,6 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
import copy
load_dotenv()
sys.path.insert(

View file

@ -1,7 +1,6 @@
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
import copy
load_dotenv()
sys.path.insert(

View file

@ -0,0 +1,172 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "4FbDOmcj2VkM"
},
"source": [
"## Use LiteLLM with Arize\n",
"https://docs.litellm.ai/docs/observability/arize_integration\n",
"\n",
"This method uses the litellm proxy to send the data to Arize. The callback is set in the litellm config below, instead of using OpenInference tracing."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "21W8Woog26Ns"
},
"source": [
"## Install Dependencies"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "xrjKLBxhxu2L"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: litellm in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (1.54.1)\n",
"Requirement already satisfied: aiohttp in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (3.11.10)\n",
"Requirement already satisfied: click in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (8.1.7)\n",
"Requirement already satisfied: httpx<0.28.0,>=0.23.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (0.27.2)\n",
"Requirement already satisfied: importlib-metadata>=6.8.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (8.5.0)\n",
"Requirement already satisfied: jinja2<4.0.0,>=3.1.2 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (3.1.4)\n",
"Requirement already satisfied: jsonschema<5.0.0,>=4.22.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (4.23.0)\n",
"Requirement already satisfied: openai>=1.55.3 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (1.57.1)\n",
"Requirement already satisfied: pydantic<3.0.0,>=2.0.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (2.10.3)\n",
"Requirement already satisfied: python-dotenv>=0.2.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (1.0.1)\n",
"Requirement already satisfied: requests<3.0.0,>=2.31.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (2.32.3)\n",
"Requirement already satisfied: tiktoken>=0.7.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (0.7.0)\n",
"Requirement already satisfied: tokenizers in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from litellm) (0.21.0)\n",
"Requirement already satisfied: anyio in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.23.0->litellm) (4.7.0)\n",
"Requirement already satisfied: certifi in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.23.0->litellm) (2024.8.30)\n",
"Requirement already satisfied: httpcore==1.* in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.23.0->litellm) (1.0.7)\n",
"Requirement already satisfied: idna in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.23.0->litellm) (3.10)\n",
"Requirement already satisfied: sniffio in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from httpx<0.28.0,>=0.23.0->litellm) (1.3.1)\n",
"Requirement already satisfied: h11<0.15,>=0.13 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from httpcore==1.*->httpx<0.28.0,>=0.23.0->litellm) (0.14.0)\n",
"Requirement already satisfied: zipp>=3.20 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from importlib-metadata>=6.8.0->litellm) (3.21.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from jinja2<4.0.0,>=3.1.2->litellm) (3.0.2)\n",
"Requirement already satisfied: attrs>=22.2.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.22.0->litellm) (24.2.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.22.0->litellm) (2024.10.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.22.0->litellm) (0.35.1)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.22.0->litellm) (0.22.3)\n",
"Requirement already satisfied: distro<2,>=1.7.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from openai>=1.55.3->litellm) (1.9.0)\n",
"Requirement already satisfied: jiter<1,>=0.4.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from openai>=1.55.3->litellm) (0.6.1)\n",
"Requirement already satisfied: tqdm>4 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from openai>=1.55.3->litellm) (4.67.1)\n",
"Requirement already satisfied: typing-extensions<5,>=4.11 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from openai>=1.55.3->litellm) (4.12.2)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.0.0->litellm) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.27.1 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.0.0->litellm) (2.27.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->litellm) (3.4.0)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->litellm) (2.0.7)\n",
"Requirement already satisfied: regex>=2022.1.18 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from tiktoken>=0.7.0->litellm) (2024.11.6)\n",
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from aiohttp->litellm) (2.4.4)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from aiohttp->litellm) (1.3.1)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from aiohttp->litellm) (1.5.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from aiohttp->litellm) (6.1.0)\n",
"Requirement already satisfied: propcache>=0.2.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from aiohttp->litellm) (0.2.1)\n",
"Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from aiohttp->litellm) (1.18.3)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from tokenizers->litellm) (0.26.5)\n",
"Requirement already satisfied: filelock in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (3.16.1)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (2024.10.0)\n",
"Requirement already satisfied: packaging>=20.9 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (24.2)\n",
"Requirement already satisfied: pyyaml>=5.1 in /Users/ericxiao/Documents/arize/.venv/lib/python3.11/site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (6.0.2)\n"
]
}
],
"source": [
"!pip install litellm"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jHEu-TjZ29PJ"
},
"source": [
"## Set Env Variables"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "QWd9rTysxsWO"
},
"outputs": [],
"source": [
"import litellm\n",
"import os\n",
"from getpass import getpass\n",
"\n",
"os.environ[\"ARIZE_SPACE_KEY\"] = getpass(\"Enter your Arize space key: \")\n",
"os.environ[\"ARIZE_API_KEY\"] = getpass(\"Enter your Arize API key: \")\n",
"os.environ['OPENAI_API_KEY']= getpass(\"Enter your OpenAI API key: \")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's run a completion call and see the traces in Arize"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hello! Nice to meet you, OpenAI. How can I assist you today?\n"
]
}
],
"source": [
"# set arize as a callback, litellm will send the data to arize\n",
"litellm.callbacks = [\"arize\"]\n",
" \n",
"# openai call\n",
"response = litellm.completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"Hi 👋 - i'm openai\"}\n",
" ]\n",
")\n",
"print(response.choices[0].message.content)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -0,0 +1,252 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## LLM Ops Stack - LiteLLM Proxy + Langfuse \n",
"\n",
"This notebook demonstrates how to use LiteLLM Proxy with Langfuse \n",
"- Use LiteLLM Proxy for calling 100+ LLMs in OpenAI format\n",
"- Use Langfuse for viewing request / response traces \n",
"\n",
"\n",
"In this notebook we will setup LiteLLM Proxy to make requests to OpenAI, Anthropic, Bedrock and automatically log traces to Langfuse."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Setup LiteLLM Proxy\n",
"\n",
"### 1.1 Define .env variables \n",
"Define .env variables on the container that litellm proxy is running on.\n",
"```bash\n",
"## LLM API Keys\n",
"OPENAI_API_KEY=sk-proj-1234567890\n",
"ANTHROPIC_API_KEY=sk-ant-api03-1234567890\n",
"AWS_ACCESS_KEY_ID=1234567890\n",
"AWS_SECRET_ACCESS_KEY=1234567890\n",
"\n",
"## Langfuse Logging \n",
"LANGFUSE_PUBLIC_KEY=\"pk-lf-xxxx9\"\n",
"LANGFUSE_SECRET_KEY=\"sk-lf-xxxx9\"\n",
"LANGFUSE_HOST=\"https://us.cloud.langfuse.com\"\n",
"```\n",
"\n",
"\n",
"### 1.1 Setup LiteLLM Proxy Config yaml \n",
"```yaml\n",
"model_list:\n",
" - model_name: gpt-4o\n",
" litellm_params:\n",
" model: openai/gpt-4o\n",
" api_key: os.environ/OPENAI_API_KEY\n",
" - model_name: claude-3-5-sonnet-20241022\n",
" litellm_params:\n",
" model: anthropic/claude-3-5-sonnet-20241022\n",
" api_key: os.environ/ANTHROPIC_API_KEY\n",
" - model_name: us.amazon.nova-micro-v1:0\n",
" litellm_params:\n",
" model: bedrock/us.amazon.nova-micro-v1:0\n",
" aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID\n",
" aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY\n",
"\n",
"litellm_settings:\n",
" callbacks: [\"langfuse\"]\n",
"\n",
"\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Make LLM Requests to LiteLLM Proxy\n",
"\n",
"Now we will make our first LLM request to LiteLLM Proxy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.1 Setup Client Side Variables to point to LiteLLM Proxy\n",
"Set `LITELLM_PROXY_BASE_URL` to the base url of the LiteLLM Proxy and `LITELLM_VIRTUAL_KEY` to the virtual key you want to use for Authentication to LiteLLM Proxy. (Note: In this initial setup you can)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"\n",
"LITELLM_PROXY_BASE_URL=\"http://0.0.0.0:4000\"\n",
"LITELLM_VIRTUAL_KEY=\"sk-oXXRa1xxxxxxxxxxx\""
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ChatCompletion(id='chatcmpl-B0sq6QkOKNMJ0dwP3x7OoMqk1jZcI', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Langfuse is a platform designed to monitor, observe, and troubleshoot AI and large language model (LLM) applications. It provides features that help developers gain insights into how their AI systems are performing, make debugging easier, and optimize the deployment of models. Langfuse allows for tracking of model interactions, collecting telemetry, and visualizing data, which is crucial for understanding the behavior of AI models in production environments. This kind of tool is particularly useful for developers working with language models who need to ensure reliability and efficiency in their applications.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1739550502, model='gpt-4o-2024-08-06', object='chat.completion', service_tier='default', system_fingerprint='fp_523b9b6e5f', usage=CompletionUsage(completion_tokens=109, prompt_tokens=13, total_tokens=122, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import openai\n",
"client = openai.OpenAI(\n",
" api_key=LITELLM_VIRTUAL_KEY,\n",
" base_url=LITELLM_PROXY_BASE_URL\n",
")\n",
"\n",
"response = client.chat.completions.create(\n",
" model=\"gpt-4o\",\n",
" messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what is Langfuse?\"\n",
" }\n",
" ],\n",
")\n",
"\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3 View Traces on Langfuse\n",
"LiteLLM will send the request / response, model, tokens (input + output), cost to Langfuse.\n",
"\n",
"![image_description](litellm_proxy_langfuse.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.4 Call Anthropic, Bedrock models \n",
"\n",
"Now we can call `us.amazon.nova-micro-v1:0` and `claude-3-5-sonnet-20241022` models defined on your config.yaml both in the OpenAI request / response format."
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ChatCompletion(id='chatcmpl-7756e509-e61f-4f5e-b5ae-b7a41013522a', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Langfuse is an observability tool designed specifically for machine learning models and applications built with natural language processing (NLP) and large language models (LLMs). It focuses on providing detailed insights into how these models perform in real-world scenarios. Here are some key features and purposes of Langfuse:\\n\\n1. **Real-time Monitoring**: Langfuse allows developers to monitor the performance of their NLP and LLM applications in real time. This includes tracking the inputs and outputs of the models, as well as any errors or issues that arise during operation.\\n\\n2. **Error Tracking**: It helps in identifying and tracking errors in the models' outputs. By analyzing incorrect or unexpected responses, developers can pinpoint where and why errors occur, facilitating more effective debugging and improvement.\\n\\n3. **Performance Metrics**: Langfuse provides various performance metrics, such as latency, throughput, and error rates. These metrics help developers understand how well their models are performing under different conditions and workloads.\\n\\n4. **Traceability**: It offers detailed traceability of requests and responses, allowing developers to follow the path of a request through the system and see how it is processed by the model at each step.\\n\\n5. **User Feedback Integration**: Langfuse can integrate user feedback to provide context for model outputs. This helps in understanding how real users are interacting with the model and how its outputs align with user expectations.\\n\\n6. **Customizable Dashboards**: Users can create custom dashboards to visualize the data collected by Langfuse. These dashboards can be tailored to highlight the most important metrics and insights for a specific application or team.\\n\\n7. **Alerting and Notifications**: It can set up alerts for specific conditions or errors, notifying developers when something goes wrong or when performance metrics fall outside of acceptable ranges.\\n\\nBy providing comprehensive observability for NLP and LLM applications, Langfuse helps developers to build more reliable, accurate, and user-friendly models and services.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1739554005, model='us.amazon.nova-micro-v1:0', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=380, prompt_tokens=5, total_tokens=385, completion_tokens_details=None, prompt_tokens_details=None))"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import openai\n",
"client = openai.OpenAI(\n",
" api_key=LITELLM_VIRTUAL_KEY,\n",
" base_url=LITELLM_PROXY_BASE_URL\n",
")\n",
"\n",
"response = client.chat.completions.create(\n",
" model=\"us.amazon.nova-micro-v1:0\",\n",
" messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what is Langfuse?\"\n",
" }\n",
" ],\n",
")\n",
"\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Advanced - Set Langfuse Trace ID, Tags, Metadata \n",
"\n",
"Here is an example of how you can set Langfuse specific params on your client side request. See full list of supported langfuse params [here](https://docs.litellm.ai/docs/observability/langfuse_integration)\n",
"\n",
"You can view the logged trace of this request [here](https://us.cloud.langfuse.com/project/clvlhdfat0007vwb74m9lvfvi/traces/567890?timestamp=2025-02-14T17%3A30%3A26.709Z)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ChatCompletion(id='chatcmpl-789babd5-c064-4939-9093-46e4cd2e208a', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Langfuse is an observability platform designed specifically for monitoring and improving the performance of natural language processing (NLP) models and applications. It provides developers with tools to track, analyze, and optimize how their language models interact with users and handle natural language inputs.\\n\\nHere are some key features and benefits of Langfuse:\\n\\n1. **Real-Time Monitoring**: Langfuse allows developers to monitor their NLP applications in real time. This includes tracking user interactions, model responses, and overall performance metrics.\\n\\n2. **Error Tracking**: It helps in identifying and tracking errors in the model's responses. This can include incorrect, irrelevant, or unsafe outputs.\\n\\n3. **User Feedback Integration**: Langfuse enables the collection of user feedback directly within the platform. This feedback can be used to identify areas for improvement in the model's performance.\\n\\n4. **Performance Metrics**: The platform provides detailed metrics and analytics on model performance, including latency, throughput, and accuracy.\\n\\n5. **Alerts and Notifications**: Developers can set up alerts to notify them of any significant issues or anomalies in model performance.\\n\\n6. **Debugging Tools**: Langfuse offers tools to help developers debug and refine their models by providing insights into how the model processes different types of inputs.\\n\\n7. **Integration with Development Workflows**: It integrates seamlessly with various development environments and CI/CD pipelines, making it easier to incorporate observability into the development process.\\n\\n8. **Customizable Dashboards**: Users can create custom dashboards to visualize the data in a way that best suits their needs.\\n\\nLangfuse aims to help developers build more reliable, accurate, and user-friendly NLP applications by providing them with the tools to observe and improve how their models perform in real-world scenarios.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1739554281, model='us.amazon.nova-micro-v1:0', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=346, prompt_tokens=5, total_tokens=351, completion_tokens_details=None, prompt_tokens_details=None))"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import openai\n",
"client = openai.OpenAI(\n",
" api_key=LITELLM_VIRTUAL_KEY,\n",
" base_url=LITELLM_PROXY_BASE_URL\n",
")\n",
"\n",
"response = client.chat.completions.create(\n",
" model=\"us.amazon.nova-micro-v1:0\",\n",
" messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"what is Langfuse?\"\n",
" }\n",
" ],\n",
" extra_body={\n",
" \"metadata\": {\n",
" \"generation_id\": \"1234567890\",\n",
" \"trace_id\": \"567890\",\n",
" \"trace_user_id\": \"user_1234567890\",\n",
" \"tags\": [\"tag1\", \"tag2\"]\n",
" }\n",
" }\n",
")\n",
"\n",
"response"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## "
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 308 KiB

View file

@ -1,5 +1,4 @@
import requests
import json
def get_initial_config():

View file

@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):
litellm_model_name = litellm_params.get("model", "") or ""
if "vertex_ai/" in litellm_model_name:
print(f"\033[91m\nSkipping Vertex AI model\033[0m", model)
print("\033[91m\nSkipping Vertex AI model\033[0m", model)
continue
for param, value in litellm_params.items():

View file

@ -1,7 +1,6 @@
import os
from openai import OpenAI
from dotenv import load_dotenv
import httpx
import concurrent.futures
load_dotenv()

View file

@ -2,21 +2,16 @@
import json
import boto3
import sys, os
import traceback
import sys
import os
from dotenv import load_dotenv
load_dotenv()
import os, io
import io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
import io
import json
class TokenIterator:
@ -48,7 +43,6 @@ payload = {
"stream": True,
}
import boto3
client = boto3.client("sagemaker-runtime", region_name="us-west-2")
response = client.invoke_endpoint_with_response_stream(

View file

@ -0,0 +1,54 @@
import json
# List of models to update
models_to_update = [
"gpt-4o-mini",
"gpt-4o-mini-2024-07-18",
"gpt-4o",
"gpt-4o-2024-11-20",
"gpt-4o-2024-08-06",
"gpt-4o-2024-05-13",
"text-embedding-3-small",
"text-embedding-3-large",
"text-embedding-ada-002-v2",
"ft:gpt-4o-2024-08-06",
"ft:gpt-4o-mini-2024-07-18",
"ft:gpt-3.5-turbo",
"ft:davinci-002",
"ft:babbage-002",
]
def update_model_prices(file_path):
# Read the JSON file as text first to preserve number formatting
with open(file_path, "r") as file:
original_text = file.read()
data = json.loads(original_text)
# Update specified models
for model_name in models_to_update:
print("finding model", model_name)
if model_name in data:
print("found model")
model = data[model_name]
if "input_cost_per_token" in model:
# Format new values to match original style
model["input_cost_per_token_batches"] = float(
"{:.12f}".format(model["input_cost_per_token"] / 2)
)
if "output_cost_per_token" in model:
model["output_cost_per_token_batches"] = float(
"{:.12f}".format(model["output_cost_per_token"] / 2)
)
print("new pricing for model=")
# Convert all float values to full decimal format before printing
formatted_model = {
k: "{:.9f}".format(v) if isinstance(v, float) else v
for k, v in data[model_name].items()
}
print(json.dumps(formatted_model, indent=4))
# Run the update
file_path = "model_prices_and_context_window.json"
update_model_prices(file_path)

View file

@ -111,7 +111,6 @@
},
"outputs": [],
"source": [
"import mlflow\n",
"mlflow.langchain.autolog()"
]
},

View file

@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
"""
import asyncio
import os
# Enter your DATABASE_URL here
@ -33,7 +32,7 @@ async def check_view_exists(): # noqa: PLR0915
# Try to select one row from the view
await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
print("LiteLLM_VerificationTokenView Exists!") # noqa
except Exception as e:
except Exception:
# If an error occurs, the view does not exist, so create it
await db.execute_raw(
"""
@ -54,7 +53,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
print("MonthlyGlobalSpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
SELECT
@ -74,7 +73,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
print("Last30dKeysBySpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
SELECT
@ -102,7 +101,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
print("Last30dModelsBySpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
SELECT
@ -124,7 +123,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
print("MonthlyGlobalSpendPerKey Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS
SELECT
@ -147,7 +146,7 @@ async def check_view_exists(): # noqa: PLR0915
"""SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
)
print("MonthlyGlobalSpendPerUserPerKey Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS
SELECT
@ -169,11 +168,11 @@ async def check_view_exists(): # noqa: PLR0915
print("MonthlyGlobalSpendPerUserPerKey Created!") # noqa
try:
await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
await db.query_raw("""SELECT 1 FROM "DailyTagSpend" LIMIT 1""")
print("DailyTagSpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE OR REPLACE VIEW DailyTagSpend AS
CREATE OR REPLACE VIEW "DailyTagSpend" AS
SELECT
jsonb_array_elements_text(request_tags) AS individual_request_tag,
DATE(s."startTime") AS spend_date,
@ -189,7 +188,7 @@ async def check_view_exists(): # noqa: PLR0915
try:
await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
print("Last30dTopEndUsersSpend Exists!") # noqa
except Exception as e:
except Exception:
sql_query = """
CREATE VIEW "Last30dTopEndUsersSpend" AS
SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend

View file

@ -0,0 +1,15 @@
fullnameOverride: ""
# Disable database deployment and configuration
db:
deployStandalone: false
useExisting: false
# Test environment variables
envVars:
DD_ENV: "dev_helm"
DD_SERVICE: "litellm"
USE_DDTRACE: "true"
# Disable migration job since we're not using a database
migrationJob:
enabled: false

View file

@ -91,6 +91,12 @@ spec:
name: {{ include "redis.secretName" .Subcharts.redis }}
key: {{include "redis.secretPasswordKey" .Subcharts.redis }}
{{- end }}
{{- if .Values.envVars }}
{{- range $key, $val := .Values.envVars }}
- name: {{ $key }}
value: {{ $val | quote }}
{{- end }}
{{- end }}
envFrom:
{{- range .Values.environmentSecrets }}
- secretRef:

View file

@ -1,19 +1,27 @@
{{- if .Values.migrationJob.enabled }}
# This job runs the prisma migrations for the LiteLLM DB.
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "litellm.fullname" . }}-migrations
annotations:
argocd.argoproj.io/hook: PreSync
argocd.argoproj.io/hook-delete-policy: Never # keep this resource so we can debug status on ArgoCD
argocd.argoproj.io/hook-delete-policy: BeforeHookCreation # delete old migration on a new deploy in case the migration needs to make updates
checksum/config: {{ toYaml .Values | sha256sum }}
spec:
template:
metadata:
annotations:
{{- with .Values.migrationJob.annotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
containers:
- name: prisma-migrations
image: ghcr.io/berriai/litellm-database:main-latest
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "main-%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
command: ["python", "litellm/proxy/prisma_migration.py"]
workingDir: "/app"
env:
@ -42,3 +50,4 @@ spec:
value: "false" # always run the migration from the Helm PreSync hook, override the value set
restartPolicy: OnFailure
backoffLimit: {{ .Values.migrationJob.backoffLimit }}
{{- end }}

View file

@ -10,6 +10,16 @@ spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}/health/readiness']
restartPolicy: Never
command: ['sh', '-c']
args:
- |
# Wait for a bit to allow the service to be ready
sleep 10
# Try multiple times with a delay between attempts
for i in $(seq 1 30); do
wget -T 5 "{{ include "litellm.fullname" . }}:{{ .Values.service.port }}/health/readiness" && exit 0
echo "Attempt $i failed, waiting..."
sleep 2
done
exit 1
restartPolicy: Never

View file

@ -0,0 +1,43 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "litellm.fullname" . }}-env-test"
labels:
{{- include "litellm.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: test
image: busybox
command: ['sh', '-c']
args:
- |
# Test DD_ENV
if [ "$DD_ENV" != "dev_helm" ]; then
echo "❌ Environment variable DD_ENV mismatch. Expected: dev_helm, Got: $DD_ENV"
exit 1
fi
echo "✅ Environment variable DD_ENV matches expected value: $DD_ENV"
# Test DD_SERVICE
if [ "$DD_SERVICE" != "litellm" ]; then
echo "❌ Environment variable DD_SERVICE mismatch. Expected: litellm, Got: $DD_SERVICE"
exit 1
fi
echo "✅ Environment variable DD_SERVICE matches expected value: $DD_SERVICE"
# Test USE_DDTRACE
if [ "$USE_DDTRACE" != "true" ]; then
echo "❌ Environment variable USE_DDTRACE mismatch. Expected: true, Got: $USE_DDTRACE"
exit 1
fi
echo "✅ Environment variable USE_DDTRACE matches expected value: $USE_DDTRACE"
env:
- name: DD_ENV
value: {{ .Values.envVars.DD_ENV | quote }}
- name: DD_SERVICE
value: {{ .Values.envVars.DD_SERVICE | quote }}
- name: USE_DDTRACE
value: {{ .Values.envVars.USE_DDTRACE | quote }}
restartPolicy: Never

View file

@ -186,5 +186,11 @@ migrationJob:
retries: 3 # Number of retries for the Job in case of failure
backoffLimit: 4 # Backoff limit for Job restarts
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
annotations: {}
# Additional environment variables to be added to the deployment
envVars: {
# USE_DDTRACE: "true"
}

BIN
dist/litellm-1.57.6.tar.gz vendored Normal file

Binary file not shown.

View file

@ -10,14 +10,9 @@ services:
## Uncomment these lines to start proxy with a config.yaml file ##
# volumes:
# - ./config.yaml:/app/config.yaml <<- this is missing in the docker-compose file currently
# The below two are my suggestion
# command:
# - "--config=/app/config.yaml"
##############################################
#########################################
## Uncomment these lines to start proxy with a config.yaml file ##
# volumes:
###############################################
ports:
- "4000:4000" # Map the container port to the host, change the host port if necessary
environment:
@ -34,6 +29,8 @@ services:
POSTGRES_DB: litellm
POSTGRES_USER: llmproxy
POSTGRES_PASSWORD: dbpassword9090
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
interval: 1s

View file

@ -11,9 +11,7 @@ FROM $LITELLM_BUILD_IMAGE AS builder
WORKDIR /app
# Install build dependencies
RUN apk update && \
apk add --no-cache gcc python3-dev musl-dev && \
rm -rf /var/cache/apk/*
RUN apk add --no-cache gcc python3-dev musl-dev
RUN pip install --upgrade pip && \
pip install build
@ -48,8 +46,11 @@ COPY --from=builder /wheels/ /wheels/
# Install the built wheel using pip; again using a wildcard if it's the only file
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp
# Set your entrypoint and command
ENTRYPOINT ["litellm"]
ENTRYPOINT ["docker/prod_entrypoint.sh"]
CMD ["--port", "4000"]

View file

@ -33,6 +33,7 @@ WORKDIR /app
# Make sure your docker/entrypoint.sh is executable
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
# Expose the necessary port
EXPOSE 4000/tcp

View file

@ -1,18 +1,20 @@
# Base image for building
ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/python:latest-dev
# Runtime image
ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/python:latest-dev
# Builder stage
FROM $LITELLM_BUILD_IMAGE AS builder
# Set the working directory to /app
WORKDIR /app
USER root
# Install build dependencies
RUN apt-get clean && apt-get update && \
apt-get install -y gcc python3-dev && \
rm -rf /var/lib/apt/lists/*
RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev
RUN pip install --upgrade pip && \
pip install build
@ -38,8 +40,12 @@ RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
# Runtime stage
FROM $LITELLM_RUNTIME_IMAGE AS runtime
# Update dependencies and clean up - handles debian security issue
RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
# Ensure runtime stage runs as root
USER root
# Install runtime dependencies
RUN apk update && \
apk add --no-cache openssl
WORKDIR /app
# Copy the current directory contents into the container at /app
@ -67,12 +73,12 @@ RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Generate prisma client
RUN prisma generate
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp
# # Set your entrypoint and command
ENTRYPOINT ["litellm"]
ENTRYPOINT ["docker/prod_entrypoint.sh"]
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
# CMD ["--port", "4000", "--detailed_debug"]

View file

@ -1,21 +1,24 @@
# Base image for building
ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
ARG LITELLM_BUILD_IMAGE=python:3.13.1-slim
# Runtime image
ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
ARG LITELLM_RUNTIME_IMAGE=python:3.13.1-slim
# Builder stage
FROM $LITELLM_BUILD_IMAGE AS builder
# Set the working directory to /app
WORKDIR /app
# Set the shell to bash
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# Install build dependencies
RUN apt-get clean && apt-get update && \
apt-get install -y gcc python3-dev && \
rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \
pip install build
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir build
# Copy the current directory contents into the container at /app
COPY . .
@ -39,7 +42,7 @@ RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
FROM $LITELLM_RUNTIME_IMAGE AS runtime
# Update dependencies and clean up - handles debian security issue
RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Copy the current directory contents into the container at /app
@ -53,32 +56,42 @@ COPY --from=builder /wheels/ /wheels/
# Install the built wheel using pip; again using a wildcard if it's the only file
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
RUN pip install redisvl==0.0.7 --no-deps
# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
# ensure pyjwt is used, not jwt
RUN pip uninstall jwt -y
RUN pip uninstall PyJWT -y
RUN pip install PyJWT==2.9.0 --no-cache-dir
RUN pip install redisvl==0.0.7 --no-deps --no-cache-dir && \
pip uninstall jwt -y && \
pip uninstall PyJWT -y && \
pip install PyJWT==2.9.0 --no-cache-dir
# Build Admin UI
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Generate prisma client
ENV PRISMA_BINARY_CACHE_DIR=/app/prisma
RUN mkdir -p /.cache
RUN chmod -R 777 /.cache
RUN pip install nodejs-bin
RUN pip install prisma
RUN prisma generate
### Prisma Handling for Non-Root #################################################
# Prisma allows you to specify the binary cache directory to use
ENV PRISMA_BINARY_CACHE_DIR=/nonexistent
RUN pip install --no-cache-dir nodejs-bin prisma
# Make a /non-existent folder and assign chown to nobody
RUN mkdir -p /nonexistent && \
chown -R nobody:nogroup /app && \
chown -R nobody:nogroup /nonexistent && \
chown -R nobody:nogroup /usr/local/lib/python3.13/site-packages/prisma/
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
# Run Prisma generate as user = nobody
USER nobody
RUN prisma generate
### End of Prisma Handling for Non-Root #########################################
EXPOSE 4000/tcp
# # Set your entrypoint and command
ENTRYPOINT ["docker/prod_entrypoint.sh"]
ENTRYPOINT ["litellm"]
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
# CMD ["--port", "4000", "--detailed_debug"]
CMD ["--port", "4000"]

View file

@ -0,0 +1,23 @@
FROM cgr.dev/chainguard/python:latest-dev
USER root
WORKDIR /app
ENV HOME=/home/litellm
ENV PATH="${HOME}/venv/bin:$PATH"
# Install runtime dependencies
RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev
RUN python -m venv ${HOME}/venv
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
COPY requirements.txt .
RUN --mount=type=cache,target=${HOME}/.cache/pip \
${HOME}/venv/bin/pip install -r requirements.txt
EXPOSE 4000/tcp
ENTRYPOINT ["litellm"]
CMD ["--port", "4000"]

View file

@ -0,0 +1,9 @@
# Docker to build LiteLLM Proxy from litellm pip package
### When to use this ?
If you need to build LiteLLM Proxy from litellm pip package, you can use this Dockerfile as a reference.
### Why build from pip package ?
- If your company has a strict requirement around security / building images you can follow steps outlined here

View file

@ -0,0 +1,9 @@
model_list:
- model_name: "gpt-4"
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
alerting: ["slack"]

View file

@ -0,0 +1,5 @@
litellm[proxy] # Specify the litellm version you want to use
prometheus_client
langfuse
prisma
ddtrace==2.19.0 # for advanced DD tracing / profiling

View file

@ -0,0 +1,8 @@
#!/bin/sh
if [ "$USE_DDTRACE" = "true" ]; then
export DD_TRACE_OPENAI_ENABLED="False"
exec ddtrace-run litellm "$@"
else
exec litellm "$@"
fi

18
docker/tests/nonroot.yaml Normal file
View file

@ -0,0 +1,18 @@
schemaVersion: 2.0.0
metadataTest:
entrypoint: ["docker/prod_entrypoint.sh"]
user: "nobody"
workdir: "/app"
fileExistenceTests:
- name: "Prisma Folder"
path: "/usr/local/lib/python3.13/site-packages/prisma/"
shouldExist: true
uid: 65534
gid: 65534
- name: "Prisma Schema"
path: "/usr/local/lib/python3.13/site-packages/prisma/schema.prisma"
shouldExist: true
uid: 65534
gid: 65534

View file

@ -1,4 +1,4 @@
FROM python:3.10
FROM python:3.14.0a3-slim
COPY . /app
WORKDIR /app

View file

@ -1,43 +0,0 @@
# 🚅 litellm
A light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs
###### litellm manages:
* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
###### observability:
* Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
## Quick Start
Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
### Installation
```
pip install litellm
```
### Usage
```python
from litellm import completion
## set ENV variables
os.environ["OPENAI_API_KEY"] = "openai key"
os.environ["COHERE_API_KEY"] = "cohere key"
messages = [{ "content": "Hello, how are you?","role": "user"}]
# openai call
response = completion(model="gpt-3.5-turbo", messages=messages)
# cohere call
response = completion("command-nightly", messages)
```
Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)
## Why did we build liteLLM
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
## Support
* [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
* Contact us at ishaan@berri.ai / krrish@berri.ai

View file

@ -0,0 +1,24 @@
# Directory Structure
When adding a new provider, you need to create a directory for the provider that follows the following structure:
```
litellm/llms/
└── provider_name/
├── completion/ # use when endpoint is equivalent to openai's `/v1/completions`
│ ├── handler.py
│ └── transformation.py
├── chat/ # use when endpoint is equivalent to openai's `/v1/chat/completions`
│ ├── handler.py
│ └── transformation.py
├── embed/ # use when endpoint is equivalent to openai's `/v1/embeddings`
│ ├── handler.py
│ └── transformation.py
├── audio_transcription/ # use when endpoint is equivalent to openai's `/v1/audio/transcriptions`
│ ├── handler.py
│ └── transformation.py
└── rerank/ # use when endpoint is equivalent to cohere's `/rerank` endpoint.
├── handler.py
└── transformation.py
```

View file

@ -0,0 +1,84 @@
# Add Rerank Provider
LiteLLM **follows the Cohere Rerank API format** for all rerank providers. Here's how to add a new rerank provider:
## 1. Create a transformation.py file
Create a config class named `<Provider><Endpoint>Config` that inherits from [`BaseRerankConfig`](https://github.com/BerriAI/litellm/blob/main/litellm/llms/base_llm/rerank/transformation.py):
```python
from litellm.types.rerank import OptionalRerankParams, RerankRequest, RerankResponse
class YourProviderRerankConfig(BaseRerankConfig):
def get_supported_cohere_rerank_params(self, model: str) -> list:
return [
"query",
"documents",
"top_n",
# ... other supported params
]
def transform_rerank_request(self, model: str, optional_rerank_params: OptionalRerankParams, headers: dict) -> dict:
# Transform request to RerankRequest spec
return rerank_request.model_dump(exclude_none=True)
def transform_rerank_response(self, model: str, raw_response: httpx.Response, ...) -> RerankResponse:
# Transform provider response to RerankResponse
return RerankResponse(**raw_response_json)
```
## 2. Register Your Provider
Add your provider to `litellm.utils.get_provider_rerank_config()`:
```python
elif litellm.LlmProviders.YOUR_PROVIDER == provider:
return litellm.YourProviderRerankConfig()
```
## 3. Add Provider to `rerank_api/main.py`
Add a code block to handle when your provider is called. Your provider should use the `base_llm_http_handler.rerank` method
```python
elif _custom_llm_provider == "your_provider":
...
response = base_llm_http_handler.rerank(
model=model,
custom_llm_provider=_custom_llm_provider,
optional_rerank_params=optional_rerank_params,
logging_obj=litellm_logging_obj,
timeout=optional_params.timeout,
api_key=dynamic_api_key or optional_params.api_key,
api_base=api_base,
_is_async=_is_async,
headers=headers or litellm.headers or {},
client=client,
mod el_response=model_response,
)
...
```
## 4. Add Tests
Add a test file to [`tests/llm_translation`](https://github.com/BerriAI/litellm/tree/main/tests/llm_translation)
```python
def test_basic_rerank_cohere():
response = litellm.rerank(
model="cohere/rerank-english-v3.0",
query="hello",
documents=["hello", "world"],
top_n=3,
)
print("re rank response: ", response)
assert response.id is not None
assert response.results is not None
```
## Reference PRs
- [Add Infinity Rerank](https://github.com/BerriAI/litellm/pull/7321)

View file

@ -105,4 +105,12 @@ transcript = client.audio.transcriptions.create(
)
```
</TabItem>
</Tabs>
</Tabs>
## Supported Providers
- OpenAI
- Azure
- [Fireworks AI](./providers/fireworks_ai.md#audio-transcription)
- [Groq](./providers/groq.md#speech-to-text---whisper)
- [Deepgram](./providers/deepgram.md)

View file

@ -5,6 +5,12 @@ import TabItem from '@theme/TabItem';
Covers Batches, Files
| Feature | Supported | Notes |
|-------|-------|-------|
| Supported Providers | OpenAI, Azure, Vertex | - |
| ✨ Cost Tracking | ✅ | LiteLLM Enterprise only |
| Logging | ✅ | Works across all logging integrations |
## Quick Start
- Create File for Batch Completion
@ -144,4 +150,23 @@ print("list_batches_response=", list_batches_response)
### [Vertex AI](./providers/vertex#batch-apis)
## How Cost Tracking for Batches API Works
LiteLLM tracks batch processing costs by logging two key events:
| Event Type | Description | When it's Logged |
|------------|-------------|------------------|
| `acreate_batch` | Initial batch creation | When batch request is submitted |
| `batch_success` | Final usage and cost | When batch processing completes |
Cost calculation:
- LiteLLM polls the batch status until completion
- Upon completion, it aggregates usage and costs from all responses in the output file
- Total `token` and `response_cost` reflect the combined metrics across all batch responses
## [Swagger API Reference](https://litellm-api.up.railway.app/#/batch)

View file

@ -1,21 +1,61 @@
import Image from '@theme/IdealImage';
# Benchmarks
Benchmarks for LiteLLM Gateway (Proxy Server)
Benchmarks for LiteLLM Gateway (Proxy Server) tested against a fake OpenAI endpoint.
Locust Settings:
- 2500 Users
- 100 user Ramp Up
Use this config for testing:
**Note:** we're currently migrating to aiohttp which has 10x higher throughput. We recommend using the `aiohttp_openai/` provider for load testing.
```yaml
model_list:
- model_name: "fake-openai-endpoint"
litellm_params:
model: aiohttp_openai/any
api_base: https://your-fake-openai-endpoint.com/chat/completions
api_key: "test"
```
### 1 Instance LiteLLM Proxy
In these tests the median latency of directly calling the fake-openai-endpoint is 60ms.
| Metric | Litellm Proxy (1 Instance) |
|--------|------------------------|
| RPS | 475 |
| Median Latency (ms) | 100 |
| Latency overhead added by LiteLLM Proxy | 40ms |
<!-- <Image img={require('../img/1_instance_proxy.png')} /> -->
<!-- ## **Horizontal Scaling - 10K RPS**
<Image img={require('../img/instances_vs_rps.png')} /> -->
#### Key Findings
- Single instance: 475 RPS @ 100ms latency
- 2 LiteLLM instances: 950 RPS @ 100ms latency
- 4 LiteLLM instances: 1900 RPS @ 100ms latency
### 2 Instances
**Adding 1 instance, will double the RPS and maintain the `100ms-110ms` median latency.**
| Metric | Litellm Proxy (2 Instances) |
|--------|------------------------|
| Median Latency (ms) | 100 |
| RPS | 950 |
## Basic Benchmarks
## Machine Spec used for testing
Overhead when using a Deployed Proxy vs Direct to LLM
- Latency overhead added by LiteLLM Proxy: 107ms
Each machine deploying LiteLLM had the following specs:
- 2 CPU
- 4GB RAM
| Metric | Direct to Fake Endpoint | Basic Litellm Proxy |
|--------|------------------------|---------------------|
| RPS | 1196 | 1133.2 |
| Median Latency (ms) | 33 | 140 |
## Logging Callbacks
@ -39,3 +79,9 @@ Using LangSmith has **no impact on latency, RPS compared to Basic Litellm Proxy*
| RPS | 1133.2 | 1135 |
| Median Latency (ms) | 140 | 132 |
## Locust Settings
- 2500 Users
- 100 user Ramp Up

View file

@ -8,6 +8,7 @@ Use `litellm.supports_function_calling(model="")` -> returns `True` if model sup
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
assert litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
assert litellm.supports_function_calling(model="palm/chat-bison") == False
assert litellm.supports_function_calling(model="xai/grok-2-latest") == True
assert litellm.supports_function_calling(model="ollama/llama2") == False
```

View file

@ -44,6 +44,7 @@ Use `litellm.get_supported_openai_params()` for an updated list of params for ea
|Anthropic| ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | | | | | | |✅ | ✅ | | ✅ | ✅ | | | ✅ |
|OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | ✅ |
|Azure OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | | | ✅ |
|xAI| ✅ | | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | |
|Replicate | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|Anyscale | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|Cohere| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | |
@ -191,6 +192,10 @@ def completion(
- `top_logprobs`: *int (optional)* - An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.
- `headers`: *dict (optional)* - A dictionary of headers to be sent with the request.
- `extra_headers`: *dict (optional)* - Alternative to `headers`, used to send extra headers in LLM API request.
#### Deprecated Params
- `functions`: *array* - A list of functions that the model may use to generate JSON inputs. Each function should have the following properties:

View file

@ -89,6 +89,7 @@ response_format: { "type": "json_schema", "json_schema": … , "strict": true }
Works for:
- OpenAI models
- Azure OpenAI models
- xAI models (Grok-2 or later)
- Google AI Studio - Gemini models
- Vertex AI models (Gemini + Anthropic)
- Bedrock Models

View file

@ -3,9 +3,11 @@ import TabItem from '@theme/TabItem';
# Streaming + Async
- [Streaming Responses](#streaming-responses)
- [Async Completion](#async-completion)
- [Async + Streaming Completion](#async-streaming)
| Feature | LiteLLM SDK | LiteLLM Proxy |
|---------|-------------|---------------|
| Streaming | ✅ [start here](#streaming-responses) | ✅ [start here](../proxy/user_keys#streaming) |
| Async | ✅ [start here](#async-completion) | ✅ [start here](../proxy/user_keys#streaming) |
| Async Streaming | ✅ [start here](#async-streaming) | ✅ [start here](../proxy/user_keys#streaming) |
## Streaming Responses
LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function

View file

@ -118,9 +118,11 @@ response = client.chat.completions.create(
Use `litellm.supports_vision(model="")` -> returns `True` if model supports `vision` and `False` if not
```python
assert litellm.supports_vision(model="gpt-4-vision-preview") == True
assert litellm.supports_vision(model="gemini-1.0-pro-vision") == True
assert litellm.supports_vision(model="gpt-3.5-turbo") == False
assert litellm.supports_vision(model="openai/gpt-4-vision-preview") == True
assert litellm.supports_vision(model="vertex_ai/gemini-1.0-pro-vision") == True
assert litellm.supports_vision(model="openai/gpt-3.5-turbo") == False
assert litellm.supports_vision(model="xai/grok-2-vision-latest") == True
assert litellm.supports_vision(model="xai/grok-2-latest") == False
```
</TabItem>

View file

@ -0,0 +1,47 @@
# Data Retention Policy
## LiteLLM Cloud
### Purpose
This policy outlines the requirements and controls/procedures LiteLLM Cloud has implemented to manage the retention and deletion of customer data.
### Policy
For Customers
1. Active Accounts
- Customer data is retained for as long as the customers account is in active status. This includes data such as prompts, generated content, logs, and usage metrics.
2. Voluntary Account Closure
- Data enters an “expired” state when the account is voluntarily closed.
- Expired account data will be retained for 30 days (adjust as needed).
- After this period, the account and all related data will be permanently removed from LiteLLM Cloud systems.
- Customers who wish to voluntarily close their account should download or back up their data (manually or via available APIs) before initiating the closure process.
3. Involuntary Suspension
- If a customer account is involuntarily suspended (e.g., due to non-payment or violation of Terms of Service), there is a 14-day (adjust as needed) grace period during which the account will be inaccessible but can be reopened if the customer resolves the issues leading to suspension.
- After the grace period, if the account remains unresolved, it will be closed and the data will enter the “expired” state.
- Once data is in the “expired” state, it will be permanently removed 30 days (adjust as needed) thereafter, unless legal requirements dictate otherwise.
4. Manual Backup of Suspended Accounts
- If a customer wishes to manually back up data contained in a suspended account, they must bring the account back to good standing (by resolving payment or policy violations) to regain interface/API access.
- Data from a suspended account will not be accessible while the account is in suspension status.
- After 14 days of suspension (adjust as needed), if no resolution is reached, the account is closed and data follows the standard “expired” data removal timeline stated above.
5. Custom Retention Policies
- Enterprise customers can configure custom data retention periods based on their specific compliance and business requirements.
- Available customization options include:
- Adjusting the retention period for active data (0-365 days)
- Custom retention policies must be configured through the LiteLLM Cloud dashboard or via API
### Protection of Records
- LiteLLM Cloud takes measures to ensure that all records under its control are protected against loss, destruction, falsification, and unauthorized access or disclosure. These measures are aligned with relevant legislative, regulatory, contractual, and business obligations.
- When working with a third-party CSP, LiteLLM Cloud requests comprehensive information regarding the CSPs security mechanisms to protect data, including records stored or processed on behalf of LiteLLM Cloud.
- Cloud service providers engaged by LiteLLM Cloud must disclose their safeguarding practices for records they gather and store on LiteLLM Clouds behalf.

View file

@ -1,5 +1,25 @@
# Data Privacy and Security
At LiteLLM, **safeguarding your data privacy and security** is our top priority. We recognize the critical importance of the data you share with us and handle it with the highest level of diligence.
With LiteLLM Cloud, we handle:
- Deployment
- Scaling
- Upgrades and security patches
- Ensuring high availability
<iframe
src="https://status.litellm.ai/badge?theme=light"
width="250"
height="30"
className="inline-block dark:hidden"
style={{
colorScheme: "light",
marginTop: "5px",
}}
></iframe>
## Security Measures
### LiteLLM Cloud
@ -12,17 +32,24 @@
- Audit Logs with retention policy
- Control Allowed IP Addresses that can access your Cloud LiteLLM Instance
For security inquiries, please contact us at support@berri.ai
### Self-hosted Instances LiteLLM
- ** No data or telemetry is stored on LiteLLM Servers when you self host **
- For installation and configuration, see: [Self-hosting guided](../docs/proxy/deploy.md)
- **Telemetry** We run no telemetry when you self host LiteLLM
- **No data or telemetry is stored on LiteLLM Servers when you self-host**
- For installation and configuration, see: [Self-hosting guide](../docs/proxy/deploy.md)
- **Telemetry**: We run no telemetry when you self-host LiteLLM
For security inquiries, please contact us at support@berri.ai
## Supported data regions for LiteLLM Cloud
## **Security Certifications**
| **Certification** | **Status** |
|-------------------|-------------------------------------------------------------------------------------------------|
| SOC 2 Type I | Certified. Report available upon request on Enterprise plan. |
| SOC 2 Type II | In progress. Certificate available by April 15th, 2025 |
| ISO27001 | In progress. Certificate available by February 7th, 2025 |
## Supported Data Regions for LiteLLM Cloud
LiteLLM supports the following data regions:
@ -31,7 +58,7 @@ LiteLLM supports the following data regions:
All data, user accounts, and infrastructure are completely separated between these two regions
## Collection of personal data
## Collection of Personal Data
### For Self-hosted LiteLLM Users:
- No personal data is collected or transmitted to LiteLLM servers when you self-host our software.
@ -40,12 +67,13 @@ All data, user accounts, and infrastructure are completely separated between the
### For LiteLLM Cloud Users:
- LiteLLM Cloud tracks LLM usage data - We do not access or store the message / response content of your API requests or responses. You can see the [fields tracked here](https://github.com/BerriAI/litellm/blob/main/schema.prisma#L174)
**How to use and share the personal data**
**How to Use and Share the Personal Data**
- Only proxy admins can view their usage data, and they can only see the usage data of their organization.
- Proxy admins have the ability to invite other users / admins to their server to view their own usage data
- LiteLLM Cloud does not sell or share any usage data with any third parties.
## Cookies information, security and privacy
## Cookies Information, Security, and Privacy
### For Self-hosted LiteLLM Users:
- Cookie data remains within your own infrastructure.
@ -81,6 +109,12 @@ We value the security community's role in protecting our systems and users. To r
We'll review all reports promptly. Note that we don't currently offer a bug bounty program.
## Vulnerability Scanning
- LiteLLM runs [`grype`](https://github.com/anchore/grype) security scans on all built Docker images.
- See [`grype litellm` check on ci/cd](https://github.com/BerriAI/litellm/blob/main/.circleci/config.yml#L1099).
- Current Status: ✅ Passing. 0 High/Critical severity vulnerabilities found.
## Legal/Compliance FAQs
### Procurement Options
@ -89,35 +123,37 @@ We'll review all reports promptly. Note that we don't currently offer a bug boun
2. AWS Marketplace
3. Azure Marketplace
### Vendor Information
Legal Entity Name: Berrie AI Incorporated
Company Phone Number: 7708783106
Number of employees in the company: 2
Number of employees in security team: 2
Point of contact email address for security incidents: krrish@berri.ai
Point of contact email address for general security-related questions: krrish@berri.ai
Has the Vendor been audited / certified? Currently undergoing SOC-2 Certification from Drata
Has the Vendor been audited / certified?
- SOC 2 Type I. Certified. Report available upon request on Enterprise plan.
- SOC 2 Type II. In progress. Certificate available by April 15th, 2025.
- ISO27001. In progress. Certificate available by February 7th, 2025.
Has an information security management system been implemented? Yes - [CodeQL](https://codeql.github.com/)
Has an information security management system been implemented?
- Yes - [CodeQL](https://codeql.github.com/) and a comprehensive ISMS covering multiple security domains.
Is logging of key events - auth, creation, update changes occurring? Yes - we have [audit logs](https://docs.litellm.ai/docs/proxy/multiple_admins#1-switch-on-audit-logs)
Is logging of key events - auth, creation, update changes occurring?
- Yes - we have [audit logs](https://docs.litellm.ai/docs/proxy/multiple_admins#1-switch-on-audit-logs)
Does the Vendor have an established Cybersecurity incident management program? No
Does the Vendor have an established Cybersecurity incident management program?
- Yes, Incident Response Policy available upon request.
Not applicable - LiteLLM is self-hosted, this is the responsibility of the team hosting the proxy. We do provide [alerting](https://docs.litellm.ai/docs/proxy/alerting) and [monitoring](https://docs.litellm.ai/docs/proxy/prometheus) tools to help with this.
Does the vendor have a vulnerability disclosure policy in place? [Yes](https://github.com/BerriAI/litellm?tab=security-ov-file#security-vulnerability-reporting-guidelines)
Does the vendor perform vulnerability scans? No
Does the vendor perform vulnerability scans?
- Yes, regular vulnerability scans are conducted as detailed in the [Vulnerability Scanning](#vulnerability-scanning) section.
Signer Name: Krish Amit Dholakia
Signer Email: krrish@berri.ai
Signer Email: krrish@berri.ai

View file

@ -1,5 +1,5 @@
# Local Debugging
There's 2 ways to do local debugging - `litellm.set_verbose=True` and by passing in a custom function `completion(...logger_fn=<your_local_function>)`. Warning: Make sure to not use `set_verbose` in production. It logs API keys, which might end up in log files.
There's 2 ways to do local debugging - `litellm._turn_on_debug()` and by passing in a custom function `completion(...logger_fn=<your_local_function>)`. Warning: Make sure to not use `_turn_on_debug()` in production. It logs API keys, which might end up in log files.
## Set Verbose
@ -8,7 +8,7 @@ This is good for getting print statements for everything litellm is doing.
import litellm
from litellm import completion
litellm.set_verbose=True # 👈 this is the 1-line change you need to make
litellm._turn_on_debug() # 👈 this is the 1-line change you need to make
## set ENV variables
os.environ["OPENAI_API_KEY"] = "openai key"

View file

@ -323,6 +323,40 @@ response = embedding(
| embed-english-light-v2.0 | `embedding(model="embed-english-light-v2.0", input=["good morning from litellm", "this is another item"])` |
| embed-multilingual-v2.0 | `embedding(model="embed-multilingual-v2.0", input=["good morning from litellm", "this is another item"])` |
## NVIDIA NIM Embedding Models
### API keys
This can be set as env variables or passed as **params to litellm.embedding()**
```python
import os
os.environ["NVIDIA_NIM_API_KEY"] = "" # api key
os.environ["NVIDIA_NIM_API_BASE"] = "" # nim endpoint url
```
### Usage
```python
from litellm import embedding
import os
os.environ['NVIDIA_NIM_API_KEY'] = ""
response = embedding(
model='nvidia_nim/<model_name>',
input=["good morning from litellm"]
)
```
All models listed [here](https://build.nvidia.com/explore/retrieval) are supported:
| Model Name | Function Call |
| :--- | :--- |
| NV-Embed-QA | `embedding(model="nvidia_nim/NV-Embed-QA", input)` |
| nvidia/nv-embed-v1 | `embedding(model="nvidia_nim/nvidia/nv-embed-v1", input)` |
| nvidia/nv-embedqa-mistral-7b-v2 | `embedding(model="nvidia_nim/nvidia/nv-embedqa-mistral-7b-v2", input)` |
| nvidia/nv-embedqa-e5-v5 | `embedding(model="nvidia_nim/nvidia/nv-embedqa-e5-v5", input)` |
| nvidia/embed-qa-4 | `embedding(model="nvidia_nim/nvidia/embed-qa-4", input)` |
| nvidia/llama-3.2-nv-embedqa-1b-v1 | `embedding(model="nvidia_nim/nvidia/llama-3.2-nv-embedqa-1b-v1", input)` |
| nvidia/llama-3.2-nv-embedqa-1b-v2 | `embedding(model="nvidia_nim/nvidia/llama-3.2-nv-embedqa-1b-v2", input)` |
| snowflake/arctic-embed-l | `embedding(model="nvidia_nim/snowflake/arctic-embed-l", input)` |
| baai/bge-m3 | `embedding(model="nvidia_nim/baai/bge-m3", input)` |
## HuggingFace Embedding Models
LiteLLM supports all Feature-Extraction + Sentence Similarity Embedding models: https://huggingface.co/models?pipeline_tag=feature-extraction
@ -394,6 +428,32 @@ print(response)
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| mistral-embed | `embedding(model="mistral/mistral-embed", input)` |
## Gemini AI Embedding Models
### API keys
This can be set as env variables or passed as **params to litellm.embedding()**
```python
import os
os.environ["GEMINI_API_KEY"] = ""
```
### Usage - Embedding
```python
from litellm import embedding
response = embedding(
model="gemini/text-embedding-004",
input=["good morning from litellm"],
)
print(response)
```
All models listed [here](https://ai.google.dev/gemini-api/docs/models/gemini) are supported:
| Model Name | Function Call |
| :--- | :--- |
| text-embedding-004 | `embedding(model="gemini/text-embedding-004", input)` |
## Vertex AI Embedding Models
@ -411,7 +471,7 @@ response = embedding(
print(response)
```
## Supported Models
### Supported Models
All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a0249f630a6792d49dffc2c5d9b7/model_prices_and_context_window.json#L835) are supported
| Model Name | Function Call |
@ -509,4 +569,4 @@ curl -X POST 'http://0.0.0.0:4000/v1/embeddings' \
}'
```
</TabItem>
</Tabs>
</Tabs>

View file

@ -2,63 +2,42 @@
For companies that need SSO, user management and professional support for LiteLLM Proxy
:::info
Interested in Enterprise? Schedule a meeting with us here 👉
[Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
Get free 7-day trial key [here](https://www.litellm.ai/#trial)
:::
Deploy managed LiteLLM Proxy within your VPC.
Includes all enterprise features.
[**View AWS Marketplace Listing**](https://aws.amazon.com/marketplace/pp/prodview-gdm3gswgjhgjo?sr=0-1&ref_=beagle&applicationId=AWSMPContessa)
[**Get early access**](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)
This covers:
- **Enterprise Features**
- **Security**
- ✅ [SSO for Admin UI](./proxy/ui#✨-enterprise-features)
- ✅ [Audit Logs with retention policy](./proxy/enterprise#audit-logs)
- ✅ [JWT-Auth](../docs/proxy/token_auth.md)
- ✅ [Control available public, private routes (Restrict certain endpoints on proxy)](./proxy/enterprise#control-available-public-private-routes)
- ✅ [**Secret Managers** AWS Key Manager, Google Secret Manager, Azure Key](./secret)
- ✅ IP addressbased access control lists
- ✅ Track Request IP Address
- ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
- ✅ Set Max Request / File Size on Requests
- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests)
- **Customize Logging, Guardrails, Caching per project**
- ✅ [Team Based Logging](./proxy/team_logging.md) - Allow each team to use their own Langfuse Project / custom callbacks
- ✅ [Disable Logging for a Team](./proxy/team_logging.md#disable-logging-for-a-team) - Switch off all logging for a team/project (GDPR Compliance)
- **Controlling Guardrails by Virtual Keys**
- **Spend Tracking & Data Exports**
- ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags)
- ✅ [Exporting LLM Logs to GCS Bucket](./proxy/bucket#🪣-logging-gcs-s3-buckets)
- ✅ [API Endpoints to get Spend Reports per Team, API Key, Customer](./proxy/cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend)
- **Prometheus Metrics**
- ✅ [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](./proxy/prometheus)
- ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](./proxy/prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens)
- **Custom Branding**
- ✅ [Custom Branding + Routes on Swagger Docs](./proxy/enterprise#swagger-docs---custom-routes--branding)
- ✅ [Public Model Hub](../docs/proxy/enterprise.md#public-model-hub)
- ✅ [Custom Email Branding](../docs/proxy/email.md#customizing-email-branding)
- [**Enterprise Features**](./proxy/enterprise)
- ✅ **Feature Prioritization**
- ✅ **Custom Integrations**
- ✅ **Professional Support - Dedicated discord + slack**
Deployment Options:
**Self-Hosted**
1. Manage Yourself - you can deploy our Docker Image or build a custom image from our pip package, and manage your own infrastructure. In this case, we would give you a license key + provide support via a dedicated support channel.
2. We Manage - you give us subscription access on your AWS/Azure/GCP account, and we manage the deployment.
**Managed**
You can use our cloud product where we setup a dedicated instance for you.
## Frequently Asked Questions
### What topics does Professional support cover and what SLAs do you offer?
### SLA's + Professional Support
Professional Support can assist with LLM/Provider integrations, deployment, upgrade management, and LLM Provider troubleshooting. We cant solve your own infrastructure-related issues but we will guide you to fix them.
- 1 hour for Sev0 issues
- 6 hours for Sev1
- 24h for Sev2-Sev3 between 7am 7pm PT (Monday through Saturday)
- 72h SLA for patching vulnerabilities in the software.
**We can offer custom SLAs** based on your needs and the severity of the issue
@ -75,4 +54,8 @@ You just deploy [our docker image](https://docs.litellm.ai/docs/proxy/deploy) an
LITELLM_LICENSE="eyJ..."
```
No data leaves your environment.
No data leaves your environment.
## Data Security / Legal / Compliance FAQs
[Data Security / Legal / Compliance FAQs](./data_security.md)

View file

@ -0,0 +1,127 @@
import TabItem from '@theme/TabItem';
import Tabs from '@theme/Tabs';
# Files API
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
## Quick Start
- Upload a File
- List Files
- Retrieve File Information
- Delete File
- Get File Content
<Tabs>
<TabItem value="proxy" label="LiteLLM PROXY Server">
```bash
$ export OPENAI_API_KEY="sk-..."
$ litellm
# RUNNING on http://0.0.0.0:4000
```
**Upload a File**
```bash
curl http://localhost:4000/v1/files \
-H "Authorization: Bearer sk-1234" \
-F purpose="fine-tune" \
-F file="@mydata.jsonl"
```
**List Files**
```bash
curl http://localhost:4000/v1/files \
-H "Authorization: Bearer sk-1234"
```
**Retrieve File Information**
```bash
curl http://localhost:4000/v1/files/file-abc123 \
-H "Authorization: Bearer sk-1234"
```
**Delete File**
```bash
curl http://localhost:4000/v1/files/file-abc123 \
-X DELETE \
-H "Authorization: Bearer sk-1234"
```
**Get File Content**
```bash
curl http://localhost:4000/v1/files/file-abc123/content \
-H "Authorization: Bearer sk-1234"
```
</TabItem>
<TabItem value="sdk" label="SDK">
**Upload a File**
```python
from litellm
import os
os.environ["OPENAI_API_KEY"] = "sk-.."
file_obj = await litellm.acreate_file(
file=open("mydata.jsonl", "rb"),
purpose="fine-tune",
custom_llm_provider="openai",
)
print("Response from creating file=", file_obj)
```
**List Files**
```python
files = await litellm.alist_files(
custom_llm_provider="openai",
limit=10
)
print("files=", files)
```
**Retrieve File Information**
```python
file = await litellm.aretrieve_file(
file_id="file-abc123",
custom_llm_provider="openai"
)
print("file=", file)
```
**Delete File**
```python
response = await litellm.adelete_file(
file_id="file-abc123",
custom_llm_provider="openai"
)
print("delete response=", response)
```
**Get File Content**
```python
content = await litellm.afile_content(
file_id="file-abc123",
custom_llm_provider="openai"
)
print("file content=", content)
```
</TabItem>
</Tabs>
## **Supported Providers**:
### [OpenAI](#quick-start)
## [Azure OpenAI](./providers/azure#azure-batches-api)
### [Vertex AI](./providers/vertex#batch-apis)
## [Swagger API Reference](https://litellm-api.up.railway.app/#/files)

View file

@ -10,10 +10,12 @@ This is an Enterprise only endpoint [Get Started with Enterprise here](https://c
:::
## Supported Providers
- Azure OpenAI
- OpenAI
- Vertex AI
| Feature | Supported | Notes |
|-------|-------|-------|
| Supported Providers | OpenAI, Azure OpenAI, Vertex AI | - |
| Cost Tracking | 🟡 | [Let us know if you need this](https://github.com/BerriAI/litellm/issues) |
| Logging | ✅ | Works across all logging integrations |
Add `finetune_settings` and `files_settings` to your litellm config.yaml to use the fine-tuning endpoints.
## Example config.yaml for `finetune_settings` and `files_settings`
@ -110,58 +112,6 @@ curl http://localhost:4000/v1/fine_tuning/jobs \
</TabItem>
<TabItem value="Vertex" label="VertexAI">
<Tabs>
<TabItem value="openai" label="OpenAI Python SDK">
```python
ft_job = await client.fine_tuning.jobs.create(
model="gemini-1.0-pro-002", # Vertex model you want to fine-tune
training_file="gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl", # file_id from create file response
extra_body={"custom_llm_provider": "vertex_ai"}, # tell litellm proxy which provider to use
)
```
</TabItem>
<TabItem value="curl" label="curl (Unified API)">
```shell
curl http://localhost:4000/v1/fine_tuning/jobs \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"custom_llm_provider": "vertex_ai",
"model": "gemini-1.0-pro-002",
"training_file": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl"
}'
```
</TabItem>
<TabItem value="curl-vtx" label="curl (VertexAI API)">
:::info
Use this to create Fine tuning Jobs in [the Vertex AI API Format](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning#create-tuning)
:::
```shell
curl http://localhost:4000/v1/projects/tuningJobs \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"baseModel": "gemini-1.0-pro-002",
"supervisedTuningSpec" : {
"training_dataset_uri": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl"
}
}'
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
### Request Body

View file

@ -80,13 +80,13 @@ except OpenAIError as e:
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to MLflow, Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python
from litellm import completion
## set env variables for logging tools
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
## set env variables for logging tools (API key set up is not required when using MLflow)
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
@ -94,7 +94,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to langfuse, lunary, supabase, helicone
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to MLflow, langfuse, lunary, helicone
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

View file

@ -0,0 +1,31 @@
# [BETA] Image Variations
OpenAI's `/image/variations` endpoint is now supported.
## Quick Start
```python
from litellm import image_variation
import os
# set env vars
os.environ["OPENAI_API_KEY"] = ""
os.environ["TOPAZ_API_KEY"] = ""
# openai call
response = image_variation(
model="dall-e-2", image=image_url
)
# topaz call
response = image_variation(
model="topaz/Standard V2", image=image_url
)
print(response)
```
## Supported Providers
- OpenAI
- Topaz

View file

@ -67,7 +67,7 @@ import os
os.environ["OPENAI_API_KEY"] = "your-api-key"
response = completion(
model="gpt-3.5-turbo",
model="openai/gpt-4o",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
```
@ -83,13 +83,27 @@ import os
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
response = completion(
model="claude-2",
model="anthropic/claude-3-sonnet-20240229",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
```
</TabItem>
<TabItem value="xai" label="xAI">
```python
from litellm import completion
import os
## set ENV variables
os.environ["XAI_API_KEY"] = "your-api-key"
response = completion(
model="xai/grok-2-latest",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
```
</TabItem>
<TabItem value="vertex" label="VertexAI">
```python
@ -101,7 +115,25 @@ os.environ["VERTEX_PROJECT"] = "hardy-device-386718"
os.environ["VERTEX_LOCATION"] = "us-central1"
response = completion(
model="chat-bison",
model="vertex_ai/gemini-1.5-pro",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
```
</TabItem>
<TabItem value="nvidia" label="NVIDIA">
```python
from litellm import completion
import os
## set ENV variables
os.environ["NVIDIA_NIM_API_KEY"] = "nvidia_api_key"
os.environ["NVIDIA_NIM_API_BASE"] = "nvidia_nim_endpoint_url"
response = completion(
model="nvidia_nim/<model_name>",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
```
@ -180,6 +212,42 @@ response = completion(
</Tabs>
### Response Format (OpenAI Format)
```json
{
"id": "chatcmpl-565d891b-a42e-4c39-8d14-82a1f5208885",
"created": 1734366691,
"model": "claude-3-sonnet-20240229",
"object": "chat.completion",
"system_fingerprint": null,
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "Hello! As an AI language model, I don't have feelings, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
}
}
],
"usage": {
"completion_tokens": 43,
"prompt_tokens": 13,
"total_tokens": 56,
"completion_tokens_details": null,
"prompt_tokens_details": {
"audio_tokens": null,
"cached_tokens": 0
},
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0
}
}
```
### Streaming
Set `stream=True` in the `completion` args.
@ -194,7 +262,7 @@ import os
os.environ["OPENAI_API_KEY"] = "your-api-key"
response = completion(
model="gpt-3.5-turbo",
model="openai/gpt-4o",
messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
@ -211,14 +279,29 @@ import os
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
response = completion(
model="claude-2",
model="anthropic/claude-3-sonnet-20240229",
messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
```
</TabItem>
<TabItem value="xai" label="xAI">
```python
from litellm import completion
import os
## set ENV variables
os.environ["XAI_API_KEY"] = "your-api-key"
response = completion(
model="xai/grok-2-latest",
messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
```
</TabItem>
<TabItem value="vertex" label="VertexAI">
```python
@ -230,7 +313,7 @@ os.environ["VERTEX_PROJECT"] = "hardy-device-386718"
os.environ["VERTEX_LOCATION"] = "us-central1"
response = completion(
model="chat-bison",
model="vertex_ai/gemini-1.5-pro",
messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True,
)
@ -238,6 +321,24 @@ response = completion(
</TabItem>
<TabItem value="nvidia" label="NVIDIA">
```python
from litellm import completion
import os
## set ENV variables
os.environ["NVIDIA_NIM_API_KEY"] = "nvidia_api_key"
os.environ["NVIDIA_NIM_API_BASE"] = "nvidia_nim_endpoint_url"
response = completion(
model="nvidia_nim/<model_name>",
messages=[{ "content": "Hello, how are you?","role": "user"}]
stream=True,
)
```
</TabItem>
<TabItem value="hugging" label="HuggingFace">
```python
@ -314,6 +415,32 @@ response = completion(
</Tabs>
### Streaming Response Format (OpenAI Format)
```json
{
"id": "chatcmpl-2be06597-eb60-4c70-9ec5-8cd2ab1b4697",
"created": 1734366925,
"model": "claude-3-sonnet-20240229",
"object": "chat.completion.chunk",
"system_fingerprint": null,
"choices": [
{
"finish_reason": null,
"index": 0,
"delta": {
"content": "Hello",
"role": "assistant",
"function_call": null,
"tool_calls": null,
"audio": null
},
"logprobs": null
}
]
}
```
### Exception handling
LiteLLM maps exceptions across all supported providers to the OpenAI exceptions. All our exceptions inherit from OpenAI's exception types, so any error-handling you have for that, should work out of the box with LiteLLM.
@ -331,21 +458,21 @@ except OpenAIError as e:
```
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python
from litellm import completion
## set env variables for logging tools
## set env variables for logging tools (API key set up is not required when using MLflow)
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to lunary, mlflow, langfuse, helicone
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

View file

@ -111,5 +111,54 @@ chat.invoke(messages)
</TabItem>
</Tabs>
## Use Langchain ChatLiteLLM with MLflow
MLflow provides open-source observability solution for ChatLiteLLM.
To enable the integration, simply call `mlflow.litellm.autolog()` before in your code. No other setup is necessary.
```python
import mlflow
mlflow.litellm.autolog()
```
Once the auto-tracing is enabled, you can invoke `ChatLiteLLM` and see recorded traces in MLflow.
```python
import os
from langchain.chat_models import ChatLiteLLM
os.environ['OPENAI_API_KEY']="sk-..."
chat = ChatLiteLLM(model="gpt-4o-mini")
chat.invoke("Hi!")
```
## Use Langchain ChatLiteLLM with Lunary
```python
import os
from langchain.chat_models import ChatLiteLLM
from langchain.schema import HumanMessage
import litellm
os.environ["LUNARY_PUBLIC_KEY"] = "" # from https://app.lunary.ai/settings
os.environ['OPENAI_API_KEY']="sk-..."
litellm.success_callback = ["lunary"]
litellm.failure_callback = ["lunary"]
chat = ChatLiteLLM(
model="gpt-4o"
messages = [
HumanMessage(
content="what model are you"
)
]
chat(messages)
```
Get more details [here](../observability/lunary_integration.md)
## Use LangChain ChatLiteLLM + Langfuse
Checkout this section [here](../observability/langfuse_integration#use-langchain-chatlitellm--langfuse) for more details on how to integrate Langfuse with ChatLiteLLM.

View file

@ -25,6 +25,18 @@ Tutorial on how to get to 1K+ RPS with LiteLLM Proxy on locust
callbacks: ["prometheus"] # Enterprise LiteLLM Only - use prometheus to get metrics on your load test
```
**Use this config for testing:**
**Note:** we're currently migrating to aiohttp which has 10x higher throughput. We recommend using the `aiohttp_openai/` provider for load testing.
```yaml
model_list:
- model_name: "fake-openai-endpoint"
litellm_params:
model: aiohttp_openai/any
api_base: https://your-fake-openai-endpoint.com/chat/completions
api_key: "test"
```
## Load Test - Fake OpenAI Endpoint
@ -46,7 +58,7 @@ litellm provides a hosted `fake-openai-endpoint` you can load test against
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
model: aiohttp_openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
@ -170,7 +182,7 @@ Use the following [prometheus metrics to debug your load tests / failures](./pro
## Machine Specifications for Running LiteLLM Proxy
👉 **Number of Replicas of LiteLLM Proxy=20** for getting 1K+ RPS
👉 **Number of Replicas of LiteLLM Proxy=4** for getting 1K+ RPS
| Service | Spec | CPUs | Memory | Architecture | Version|
| --- | --- | --- | --- | --- | --- |

View file

@ -19,6 +19,7 @@ Make an account on [Arize AI](https://app.arize.com/auth/login)
## Quick Start
Use just 2 lines of code, to instantly log your responses **across all providers** with arize
You can also use the instrumentor option instead of the callback, which you can find [here](https://docs.arize.com/arize/llm-tracing/tracing-integrations-auto/litellm).
```python
litellm.callbacks = ["arize"]
@ -28,7 +29,7 @@ import litellm
import os
os.environ["ARIZE_SPACE_KEY"] = ""
os.environ["ARIZE_API_KEY"] = "" # defaults to litellm-completion
os.environ["ARIZE_API_KEY"] = ""
# LLM API Keys
os.environ['OPENAI_API_KEY']=""

View file

@ -78,6 +78,17 @@ Following are the allowed fields in metadata, their types, and their description
* `context: Optional[Union[dict, str]]` - This is the context used as information for the prompt. For RAG applications, this is the "retrieved" data. You may log context as a string or as an object (dictionary).
* `expected_response: Optional[str]` - This is the reference response to compare against for evaluation purposes. This is useful for segmenting inference calls by expected response.
* `user_query: Optional[str]` - This is the user's query. For conversational applications, this is the user's last message.
* `custom_attributes: Optional[dict]` - This is a dictionary of custom attributes. This is useful for additional information about the inference.
## Using a self hosted deployment of Athina
If you are using a self hosted deployment of Athina, you will need to set the `ATHINA_BASE_URL` environment variable to point to your self hosted deployment.
```python
...
os.environ["ATHINA_BASE_URL"]= "http://localhost:9000"
...
```
## Support & Talk with Athina Team

View file

@ -67,7 +67,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
}'
```
## Advanced - pass Project ID
## Advanced - pass Project ID or name
<Tabs>
<TabItem value="sdk" label="SDK">
@ -79,7 +79,10 @@ response = litellm.completion(
{"role": "user", "content": "Hi 👋 - i'm openai"}
],
metadata={
"project_id": "my-special-project"
"project_id": "1234",
# passing project_name will try to find a project with that name, or create one if it doesn't exist
# if both project_id and project_name are passed, project_id will be used
# "project_name": "my-special-project"
}
)
```

View file

@ -7,11 +7,11 @@ liteLLM provides `input_callbacks`, `success_callbacks` and `failure_callbacks`,
liteLLM supports:
- [Custom Callback Functions](https://docs.litellm.ai/docs/observability/custom_callback)
- [Lunary](https://lunary.ai/docs)
- [Langfuse](https://langfuse.com/docs)
- [LangSmith](https://www.langchain.com/langsmith)
- [Helicone](https://docs.helicone.ai/introduction)
- [Traceloop](https://traceloop.com/docs)
- [Lunary](https://lunary.ai/docs)
- [Athina](https://docs.athina.ai/)
- [Sentry](https://docs.sentry.io/platforms/python/)
- [PostHog](https://posthog.com/docs/libraries/python)
@ -30,6 +30,7 @@ litellm.success_callback=["posthog", "helicone", "langfuse", "lunary", "athina"]
litellm.failure_callback=["sentry", "lunary", "langfuse"]
## set env variables
os.environ['LUNARY_PUBLIC_KEY'] = ""
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
os.environ["HELICONE_API_KEY"] = ""

View file

@ -20,9 +20,7 @@ class MyCustomHandler(CustomLogger):
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call")
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success")
@ -30,9 +28,6 @@ class MyCustomHandler(CustomLogger):
print(f"On Failure")
#### ASYNC #### - for acompletion/aembeddings
async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Streaming")
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Success")
@ -127,8 +122,7 @@ from litellm import acompletion
class MyCustomHandler(CustomLogger):
#### ASYNC ####
async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Streaming")
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Success")

View file

@ -0,0 +1,176 @@
import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Humanloop
[Humanloop](https://humanloop.com/docs/v5/getting-started/overview) enables product teams to build robust AI features with LLMs, using best-in-class tooling for Evaluation, Prompt Management, and Observability.
## Getting Started
Use Humanloop to manage prompts across all LiteLLM Providers.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import os
import litellm
os.environ["HUMANLOOP_API_KEY"] = "" # [OPTIONAL] set here or in `.completion`
litellm.set_verbose = True # see raw request to provider
resp = litellm.completion(
model="humanloop/gpt-3.5-turbo",
prompt_id="test-chat-prompt",
prompt_variables={"user_message": "this is used"}, # [OPTIONAL]
messages=[{"role": "user", "content": "<IGNORED>"}],
# humanloop_api_key="..." ## alternative to setting env var
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: humanloop/gpt-3.5-turbo
prompt_id: "<humanloop_prompt_id>"
api_key: os.environ/OPENAI_API_KEY
```
2. Start the proxy
```bash
litellm --config config.yaml --detailed_debug
```
3. Test it!
<Tabs>
<TabItem value="curl" label="CURL">
```bash
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-d '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "THIS WILL BE IGNORED"
}
],
"prompt_variables": {
"key": "this is used"
}
}'
```
</TabItem>
<TabItem value="OpenAI Python SDK" label="OpenAI Python SDK">
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"prompt_variables": { # [OPTIONAL]
"key": "this is used"
}
}
)
print(response)
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
**Expected Logs:**
```
POST Request Sent from LiteLLM:
curl -X POST \
https://api.openai.com/v1/ \
-d '{'model': 'gpt-3.5-turbo', 'messages': <YOUR HUMANLOOP PROMPT TEMPLATE>}'
```
## How to set model
## How to set model
### Set the model on LiteLLM
You can do `humanloop/<litellm_model_name>`
<Tabs>
<TabItem value="sdk" label="SDK">
```python
litellm.completion(
model="humanloop/gpt-3.5-turbo", # or `humanloop/anthropic/claude-3-5-sonnet`
...
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
```yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: humanloop/gpt-3.5-turbo # OR humanloop/anthropic/claude-3-5-sonnet
prompt_id: <humanloop_prompt_id>
api_key: os.environ/OPENAI_API_KEY
```
</TabItem>
</Tabs>
### Set the model on Humanloop
LiteLLM will call humanloop's `https://api.humanloop.com/v5/prompts/<your-prompt-id>` endpoint, to get the prompt template.
This also returns the template model set on Humanloop.
```bash
{
"template": [
{
... # your prompt template
}
],
"model": "gpt-3.5-turbo" # your template model
}
```

View file

@ -3,13 +3,6 @@ import Image from '@theme/IdealImage';
# Langsmith - Logging LLM Input/Output
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
An all-in-one developer platform for every step of the application lifecycle
https://smith.langchain.com/
@ -66,7 +59,7 @@ os.environ["LANGSMITH_API_KEY"] = ""
# LLM API Keys
os.environ['OPENAI_API_KEY']=""
# set langfuse as a callback, litellm will send the data to langfuse
# set langsmith as a callback, litellm will send the data to langsmith
litellm.success_callback = ["langsmith"]
response = litellm.completion(

Some files were not shown because too many files have changed in this diff Show more