Merge branch 'main' into litellm_sagemaker_fix_stream

This commit is contained in:
Ishaan Jaff 2025-03-31 14:22:20 -07:00
commit 83ba96b8c6
452 changed files with 13927 additions and 3613 deletions

View file

@ -3,6 +3,18 @@ orbs:
codecov: codecov/codecov@4.0.1
node: circleci/node@5.1.0 # Add this line to declare the node orb
commands:
setup_google_dns:
steps:
- run:
name: "Configure Google DNS"
command: |
# Backup original resolv.conf
sudo cp /etc/resolv.conf /etc/resolv.conf.backup
# Add both local and Google DNS servers
echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
jobs:
local_testing:
@ -15,7 +27,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Show git commit hash
command: |
@ -134,7 +146,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Show git commit hash
command: |
@ -234,7 +246,13 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: DNS lookup for Redis host
command: |
sudo apt-get update
sudo apt-get install -y dnsutils
dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
- run:
name: Show git commit hash
command: |
@ -334,6 +352,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -388,6 +407,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -429,6 +449,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Show git commit hash
command: |
@ -479,7 +500,13 @@ jobs:
working_directory: ~/project
steps:
- checkout
- run:
name: Install PostgreSQL
command: |
sudo apt-get update
sudo apt-get install postgresql postgresql-contrib
echo 'export PATH=/usr/lib/postgresql/*/bin:$PATH' >> $BASH_ENV
- setup_google_dns
- run:
name: Show git commit hash
command: |
@ -534,6 +561,7 @@ jobs:
pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0"
pip install "jsonschema==4.22.0"
pip install "pytest-postgresql==7.0.1"
- save_cache:
paths:
- ./venv
@ -569,7 +597,7 @@ jobs:
- litellm_proxy_unit_tests_coverage
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
docker:
- image: cimg/python:3.11
- image: cimg/python:3.13.1
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
@ -577,6 +605,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -618,6 +647,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -654,6 +684,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -696,6 +727,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -740,6 +772,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -782,6 +815,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -828,6 +862,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -872,6 +907,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -918,6 +954,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -960,6 +997,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -1002,6 +1040,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -1048,6 +1087,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -1080,6 +1120,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -1104,6 +1145,7 @@ jobs:
steps:
- checkout
- setup_google_dns
# Install Helm
- run:
name: Install Helm
@ -1173,6 +1215,7 @@ jobs:
steps:
- checkout
- setup_google_dns
- run:
name: Install Dependencies
command: |
@ -1209,6 +1252,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Python 3.9
command: |
@ -1283,6 +1327,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
@ -1418,6 +1463,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
@ -1542,6 +1588,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
@ -1704,6 +1751,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
@ -1815,6 +1863,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
@ -1897,6 +1946,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
# Remove Docker CLI installation since it's already available in machine executor
- run:
name: Install Python 3.13
@ -1994,6 +2044,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
@ -2039,6 +2090,8 @@ jobs:
pip install "google-cloud-aiplatform==1.59.0"
pip install "anthropic==0.49.0"
pip install "langchain_mcp_adapters==0.0.5"
pip install "langchain_openai==0.2.1"
pip install "langgraph==0.3.18"
# Run pytest and generate JUnit XML report
- run:
name: Build Docker image
@ -2251,6 +2304,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Build UI
command: |
@ -2365,6 +2419,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Build Docker image
command: |
@ -2387,6 +2442,7 @@ jobs:
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Build Docker image
command: |

206
.github/workflows/publish-migrations.yml vendored Normal file
View file

@ -0,0 +1,206 @@
name: Publish Prisma Migrations
permissions:
contents: write
pull-requests: write
on:
push:
paths:
- 'schema.prisma' # Check root schema.prisma
branches:
- main
jobs:
publish-migrations:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:14
env:
POSTGRES_DB: temp_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
# Add shadow database service
postgres_shadow:
image: postgres:14
env:
POSTGRES_DB: shadow_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5433:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
pip install prisma
pip install python-dotenv
- name: Generate Initial Migration if None Exists
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
mkdir -p deploy/migrations
echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
echo "No existing migrations found, creating baseline..."
VERSION=$(date +%Y%m%d%H%M%S)
mkdir -p deploy/migrations/${VERSION}_initial
echo "Generating initial migration..."
# Save raw output for debugging
prisma migrate diff \
--from-empty \
--to-schema-datamodel schema.prisma \
--shadow-database-url "${SHADOW_DATABASE_URL}" \
--script > deploy/migrations/${VERSION}_initial/raw_migration.sql
echo "Raw migration file content:"
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
echo "Cleaning migration file..."
# Clean the file
sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
# Verify the migration file
if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
echo "ERROR: Migration file is empty after cleaning"
echo "Original content was:"
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
exit 1
fi
echo "Final migration file content:"
cat deploy/migrations/${VERSION}_initial/migration.sql
# Verify it starts with SQL
if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
echo "ERROR: Migration file does not start with SQL command or comment"
echo "First line is:"
head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
echo "Full content is:"
cat deploy/migrations/${VERSION}_initial/migration.sql
exit 1
fi
echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
fi
- name: Compare and Generate Migration
if: success()
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
# Create temporary migration workspace
mkdir -p temp_migrations
# Copy existing migrations (will not fail if directory is empty)
cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
VERSION=$(date +%Y%m%d%H%M%S)
# Generate diff against existing migrations or empty state
prisma migrate diff \
--from-migrations temp_migrations \
--to-schema-datamodel schema.prisma \
--shadow-database-url "${SHADOW_DATABASE_URL}" \
--script > temp_migrations/migration_${VERSION}.sql
# Check if there are actual changes
if [ -s temp_migrations/migration_${VERSION}.sql ]; then
echo "Changes detected, creating new migration"
mkdir -p deploy/migrations/${VERSION}_schema_update
mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
else
echo "No schema changes detected"
exit 0
fi
- name: Verify Migration
if: success()
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
# Create test database
psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
# Apply all migrations in order to verify
for migration in deploy/migrations/*/migration.sql; do
echo "Applying migration: $migration"
psql "${SHADOW_DATABASE_URL}" -f $migration
done
# Add this step before create-pull-request to debug permissions
- name: Check Token Permissions
run: |
echo "Checking token permissions..."
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/BerriAI/litellm/collaborators
echo "\nChecking if token can create PRs..."
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/BerriAI/litellm
# Add this debug step before git push
- name: Debug Changed Files
run: |
echo "Files staged for commit:"
git diff --name-status --staged
echo "\nAll changed files:"
git status
- name: Create Pull Request
if: success()
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: "chore: update prisma migrations"
title: "Update Prisma Migrations"
body: |
Auto-generated migration based on schema.prisma changes.
Generated files:
- deploy/migrations/${VERSION}_schema_update/migration.sql
- deploy/migrations/${VERSION}_schema_update/README.md
branch: feat/prisma-migration-${{ env.VERSION }}
base: main
delete-branch: true
- name: Generate and Save Migrations
run: |
# Only add migration files
git add deploy/migrations/
git status # Debug what's being committed
git commit -m "chore: update prisma migrations"

53
.github/workflows/test-linting.yml vendored Normal file
View file

@ -0,0 +1,53 @@
name: LiteLLM Linting
on:
pull_request:
branches: [ main ]
jobs:
lint:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install dependencies
run: |
poetry install --with dev
- name: Run Black formatting check
run: |
cd litellm
poetry run black . --check
cd ..
- name: Run Ruff linting
run: |
cd litellm
poetry run ruff check .
cd ..
- name: Run MyPy type checking
run: |
cd litellm
poetry run mypy . --ignore-missing-imports
cd ..
- name: Check for circular imports
run: |
cd litellm
poetry run python ../tests/documentation_tests/test_circular_imports.py
cd ..
- name: Check import safety
run: |
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)

35
.github/workflows/test-litellm.yml vendored Normal file
View file

@ -0,0 +1,35 @@
name: LiteLLM Mock Tests (folder - tests/litellm)
on:
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- name: Thank You Message
run: |
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install dependencies
run: |
poetry install --with dev,proxy-dev --extras proxy
poetry run pip install pytest-xdist
- name: Run tests
run: |
poetry run pytest tests/litellm -x -vv -n 4

1
.gitignore vendored
View file

@ -83,4 +83,5 @@ tests/llm_translation/test_vertex_key.json
litellm/proxy/migrations/0_init/migration.sql
litellm/proxy/db/migrations/0_init/migration.sql
litellm/proxy/db/migrations/*
litellm/proxy/migrations/*config.yaml
litellm/proxy/migrations/*

View file

@ -6,44 +6,35 @@ repos:
entry: pyright
language: system
types: [python]
files: ^litellm/
files: ^(litellm/|litellm_proxy_extras/)
- id: isort
name: isort
entry: isort
language: system
types: [python]
files: litellm/.*\.py
files: (litellm/|litellm_proxy_extras/).*\.py
exclude: ^litellm/__init__.py$
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
name: black
entry: poetry run black
language: system
types: [python]
files: (litellm/|litellm_proxy_extras/).*\.py
- repo: https://github.com/pycqa/flake8
rev: 7.0.0 # The version of flake8 to use
hooks:
- id: flake8
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
additional_dependencies: [flake8-print]
files: litellm/.*\.py
# - id: flake8
# name: flake8 (router.py function length)
# files: ^litellm/router\.py$
# args: [--max-function-length=40]
# # additional_dependencies: [flake8-functions]
files: (litellm/|litellm_proxy_extras/).*\.py
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check
files: ^(pyproject.toml|litellm-proxy-extras/pyproject.toml)$
- repo: local
hooks:
- id: check-files-match
name: Check if files match
entry: python3 ci_cd/check_files_match.py
language: system
# - id: check-file-length
# name: Check file length
# entry: python check_file_length.py
# args: ["10000"] # set your desired maximum number of lines
# language: python
# files: litellm/.*\.py
# exclude: ^litellm/tests/

View file

@ -14,6 +14,9 @@ help:
install-dev:
poetry install --with dev
install-proxy-dev:
poetry install --with dev,proxy-dev
lint: install-dev
poetry run pip install types-requests types-setuptools types-redis types-PyYAML
cd litellm && poetry run mypy . --ignore-missing-imports

View file

@ -16,9 +16,6 @@
<a href="https://pypi.org/project/litellm/" target="_blank">
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
</a>
<a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
<img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
</a>
<a href="https://www.ycombinator.com/companies/berriai">
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
</a>

60
ci_cd/baseline_db.py Normal file
View file

@ -0,0 +1,60 @@
import subprocess
from pathlib import Path
from datetime import datetime
def create_baseline():
"""Create baseline migration in deploy/migrations"""
try:
# Get paths
root_dir = Path(__file__).parent.parent
deploy_dir = root_dir / "deploy"
migrations_dir = deploy_dir / "migrations"
schema_path = root_dir / "schema.prisma"
# Create migrations directory
migrations_dir.mkdir(parents=True, exist_ok=True)
# Create migration_lock.toml if it doesn't exist
lock_file = migrations_dir / "migration_lock.toml"
if not lock_file.exists():
lock_file.write_text('provider = "postgresql"\n')
# Create timestamp-based migration directory
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
migration_dir = migrations_dir / f"{timestamp}_baseline"
migration_dir.mkdir(parents=True, exist_ok=True)
# Generate migration SQL
result = subprocess.run(
[
"prisma",
"migrate",
"diff",
"--from-empty",
"--to-schema-datamodel",
str(schema_path),
"--script",
],
capture_output=True,
text=True,
check=True,
)
# Write the SQL to migration.sql
migration_file = migration_dir / "migration.sql"
migration_file.write_text(result.stdout)
print(f"Created baseline migration in {migration_dir}")
return True
except subprocess.CalledProcessError as e:
print(f"Error running prisma command: {e.stderr}")
return False
except Exception as e:
print(f"Error creating baseline migration: {str(e)}")
return False
if __name__ == "__main__":
create_baseline()

View file

@ -0,0 +1,19 @@
#!/bin/bash
# Exit on error
set -e
echo "🚀 Building and publishing litellm-proxy-extras"
# Navigate to litellm-proxy-extras directory
cd "$(dirname "$0")/../litellm-proxy-extras"
# Build the package
echo "📦 Building package..."
poetry build
# Publish to PyPI
echo "🌎 Publishing to PyPI..."
poetry publish
echo "✅ Done! Package published successfully"

95
ci_cd/run_migration.py Normal file
View file

@ -0,0 +1,95 @@
import os
import subprocess
from pathlib import Path
from datetime import datetime
import testing.postgresql
import shutil
def create_migration(migration_name: str = None):
"""
Create a new migration SQL file in the migrations directory by comparing
current database state with schema
Args:
migration_name (str): Name for the migration
"""
try:
# Get paths
root_dir = Path(__file__).parent.parent
migrations_dir = root_dir / "litellm-proxy-extras" / "litellm_proxy_extras" / "migrations"
schema_path = root_dir / "schema.prisma"
# Create temporary PostgreSQL database
with testing.postgresql.Postgresql() as postgresql:
db_url = postgresql.url()
# Create temporary migrations directory next to schema.prisma
temp_migrations_dir = schema_path.parent / "migrations"
try:
# Copy existing migrations to temp directory
if temp_migrations_dir.exists():
shutil.rmtree(temp_migrations_dir)
shutil.copytree(migrations_dir, temp_migrations_dir)
# Apply existing migrations to temp database
os.environ["DATABASE_URL"] = db_url
subprocess.run(
["prisma", "migrate", "deploy", "--schema", str(schema_path)],
check=True,
)
# Generate diff between current database and schema
result = subprocess.run(
[
"prisma",
"migrate",
"diff",
"--from-url",
db_url,
"--to-schema-datamodel",
str(schema_path),
"--script",
],
capture_output=True,
text=True,
check=True,
)
if result.stdout.strip():
# Generate timestamp and create migration directory
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
migration_name = migration_name or "unnamed_migration"
migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
migration_dir.mkdir(parents=True, exist_ok=True)
# Write the SQL to migration.sql
migration_file = migration_dir / "migration.sql"
migration_file.write_text(result.stdout)
print(f"Created migration in {migration_dir}")
return True
else:
print("No schema changes detected. Migration not needed.")
return False
finally:
# Clean up: remove temporary migrations directory
if temp_migrations_dir.exists():
shutil.rmtree(temp_migrations_dir)
except subprocess.CalledProcessError as e:
print(f"Error generating migration: {e.stderr}")
return False
except Exception as e:
print(f"Error creating migration: {str(e)}")
return False
if __name__ == "__main__":
# If running directly, can optionally pass migration name as argument
import sys
migration_name = sys.argv[1] if len(sys.argv) > 1 else None
create_migration(migration_name)

View file

@ -1,5 +1,35 @@
version: "3.11"
services:
litellm:
build:
context: .
args:
target: runtime
image: ghcr.io/berriai/litellm:main-stable
#########################################
## Uncomment these lines to start proxy with a config.yaml file ##
# volumes:
# - ./config.yaml:/app/config.yaml <<- this is missing in the docker-compose file currently
# command:
# - "--config=/app/config.yaml"
##############################################
ports:
- "4000:4000" # Map the container port to the host, change the host port if necessary
environment:
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
env_file:
- .env # Load local .env file
depends_on:
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
healthcheck: # Defines the health check configuration for the container
test: [ "CMD", "curl", "-f", "http://localhost:4000/health/liveliness || exit 1" ] # Command to execute for health check
interval: 30s # Perform health check every 30 seconds
timeout: 10s # Health check command times out after 10 seconds
retries: 3 # Retry up to 3 times if health check fails
start_period: 40s # Wait 40 seconds after container start before beginning health checks
db:
image: postgres:16
restart: always
@ -16,3 +46,23 @@ services:
interval: 1s
timeout: 5s
retries: 10
prometheus:
image: prom/prometheus
volumes:
- prometheus_data:/prometheus
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=15d'
restart: always
volumes:
prometheus_data:
driver: local
postgres_data:
name: litellm_postgres_data # Named volume for Postgres data persistence

View file

@ -4,21 +4,177 @@ import Image from '@theme/IdealImage';
# /mcp [BETA] - Model Context Protocol
Use Model Context Protocol with LiteLLM
## Expose MCP tools on LiteLLM Proxy Server
This allows you to define tools that can be called by any MCP compatible client. Define your `mcp_servers` with LiteLLM and all your clients can list and call available tools.
<Image
img={require('../img/litellm_mcp.png')}
img={require('../img/mcp_2.png')}
style={{width: '100%', display: 'block', margin: '2rem auto'}}
/>
<p style={{textAlign: 'left', color: '#666'}}>
LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
</p>
#### How it works
## Overview
LiteLLM exposes the following MCP endpoints:
LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
- `/mcp/tools/list` - List all available tools
- `/mcp/tools/call` - Call a specific tool with the provided arguments
When MCP clients connect to LiteLLM they can follow this workflow:
1. Connect to the LiteLLM MCP server
2. List all available tools on LiteLLM
3. Client makes LLM API request with tool call(s)
4. LLM API returns which tools to call and with what arguments
5. MCP client makes MCP tool calls to LiteLLM
6. LiteLLM makes the tool calls to the appropriate MCP server
7. LiteLLM returns the tool call results to the MCP client
#### Usage
#### 1. Define your tools on under `mcp_servers` in your config.yaml file.
LiteLLM allows you to define your tools on the `mcp_servers` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
```yaml title="config.yaml" showLineNumbers
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: sk-xxxxxxx
mcp_servers:
{
"zapier_mcp": {
"url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
},
"fetch": {
"url": "http://localhost:8000/sse"
}
}
```
#### 2. Start LiteLLM Gateway
<Tabs>
<TabItem value="docker" label="Docker Run">
```shell title="Docker Run" showLineNumbers
docker run -d \
-p 4000:4000 \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
--name my-app \
-v $(pwd)/my_config.yaml:/app/config.yaml \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
```
</TabItem>
<TabItem value="py" label="litellm pip">
```shell title="litellm pip" showLineNumbers
litellm --config config.yaml --detailed_debug
```
</TabItem>
</Tabs>
#### 3. Make an LLM API request
In this example we will do the following:
1. Use MCP client to list MCP tools on LiteLLM Proxy
2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
3. Provide the MCP tools to `gpt-4o`
4. Handle tool call from `gpt-4o`
5. Convert OpenAI tool call to MCP tool call
6. Execute tool call on MCP server
```python title="MCP Client List Tools" showLineNumbers
import asyncio
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionUserMessageParam
from mcp import ClientSession
from mcp.client.sse import sse_client
from litellm.experimental_mcp_client.tools import (
transform_mcp_tool_to_openai_tool,
transform_openai_tool_call_request_to_mcp_tool_call_request,
)
async def main():
# Initialize clients
# point OpenAI client to LiteLLM Proxy
client = AsyncOpenAI(api_key="sk-1234", base_url="http://localhost:4000")
# Point MCP client to LiteLLM Proxy
async with sse_client("http://localhost:4000/mcp/") as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
# 1. List MCP tools on LiteLLM Proxy
mcp_tools = await session.list_tools()
print("List of MCP tools for MCP server:", mcp_tools.tools)
# Create message
messages = [
ChatCompletionUserMessageParam(
content="Send an email about LiteLLM supporting MCP", role="user"
)
]
# 2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
# Since OpenAI only supports tools in the OpenAI format, we need to convert the MCP tools to the OpenAI format.
openai_tools = [
transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
]
# 3. Provide the MCP tools to `gpt-4o`
response = await client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=openai_tools,
tool_choice="auto",
)
# 4. Handle tool call from `gpt-4o`
if response.choices[0].message.tool_calls:
tool_call = response.choices[0].message.tool_calls[0]
if tool_call:
# 5. Convert OpenAI tool call to MCP tool call
# Since MCP servers expect tools in the MCP format, we need to convert the OpenAI tool call to the MCP format.
# This is done using litellm.experimental_mcp_client.tools.transform_openai_tool_call_request_to_mcp_tool_call_request
mcp_call = (
transform_openai_tool_call_request_to_mcp_tool_call_request(
openai_tool=tool_call.model_dump()
)
)
# 6. Execute tool call on MCP server
result = await session.call_tool(
name=mcp_call.name, arguments=mcp_call.arguments
)
print("Result:", result)
# Run it
asyncio.run(main())
```
## LiteLLM Python SDK MCP Bridge
LiteLLM Python SDK acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
- **List** Available MCP Tools: OpenAI clients can view all available MCP tools
- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
@ -26,8 +182,6 @@ LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported mod
- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
## Usage
### 1. List Available MCP Tools
In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
@ -271,215 +425,3 @@ async with stdio_client(server_params) as (read, write):
</TabItem>
</Tabs>
## Upcoming Features
:::info
**This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
:::
### Expose MCP tools on LiteLLM Proxy Server
This allows you to define tools that can be called by any MCP compatible client. Define your mcp_tools with LiteLLM and all your clients can list and call available tools.
#### How it works
LiteLLM exposes the following MCP endpoints:
- `/mcp/list_tools` - List all available tools
- `/mcp/call_tool` - Call a specific tool with the provided arguments
When MCP clients connect to LiteLLM they can follow this workflow:
1. Connect to the LiteLLM MCP server
2. List all available tools on LiteLLM
3. Client makes LLM API request with tool call(s)
4. LLM API returns which tools to call and with what arguments
5. MCP client makes tool calls to LiteLLM
6. LiteLLM makes the tool calls to the appropriate handlers
7. LiteLLM returns the tool call results to the MCP client
#### Usage
#### 1. Define your tools on mcp_tools
LiteLLM allows you to define your tools on the `mcp_tools` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
```yaml
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: sk-xxxxxxx
mcp_tools:
- name: "get_current_time"
description: "Get the current time"
input_schema: {
"type": "object",
"properties": {
"format": {
"type": "string",
"description": "The format of the time to return",
"enum": ["short"]
}
}
}
handler: "mcp_tools.get_current_time"
```
#### 2. Define a handler for your tool
Create a new file called `mcp_tools.py` and add this code. The key method here is `get_current_time` which gets executed when the `get_current_time` tool is called.
```python
# mcp_tools.py
from datetime import datetime
def get_current_time(format: str = "short"):
"""
Simple handler for the 'get_current_time' tool.
Args:
format (str): The format of the time to return ('short').
Returns:
str: The current time formatted as 'HH:MM'.
"""
# Get the current time
current_time = datetime.now()
# Format the time as 'HH:MM'
return current_time.strftime('%H:%M')
```
#### 3. Start LiteLLM Gateway
<Tabs>
<TabItem value="docker" label="Docker Run">
Mount your `mcp_tools.py` on the LiteLLM Docker container.
```shell
docker run -d \
-p 4000:4000 \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
--name my-app \
-v $(pwd)/my_config.yaml:/app/config.yaml \
-v $(pwd)/mcp_tools.py:/app/mcp_tools.py \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
```
</TabItem>
<TabItem value="py" label="litellm pip">
```shell
litellm --config config.yaml --detailed_debug
```
</TabItem>
</Tabs>
#### 4. Make an LLM API request
```python
import asyncio
from langchain_mcp_adapters.tools import load_mcp_tools
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
from mcp import ClientSession
from mcp.client.sse import sse_client
async def main():
# Initialize the model with your API key
model = ChatOpenAI(model="gpt-4o")
# Connect to the MCP server
async with sse_client(url="http://localhost:4000/mcp/") as (read, write):
async with ClientSession(read, write) as session:
# Initialize the session
print("Initializing session...")
await session.initialize()
print("Session initialized")
# Load available tools from MCP
print("Loading tools...")
tools = await load_mcp_tools(session)
print(f"Loaded {len(tools)} tools")
# Create a ReAct agent with the model and tools
agent = create_react_agent(model, tools)
# Run the agent with a user query
user_query = "What's the weather in Tokyo?"
print(f"Asking: {user_query}")
agent_response = await agent.ainvoke({"messages": user_query})
print("Agent response:")
print(agent_response)
if __name__ == "__main__":
asyncio.run(main())
```
### Specification for `mcp_tools`
The `mcp_tools` section in your LiteLLM config defines tools that can be called by MCP-compatible clients.
#### Tool Definition Format
```yaml
mcp_tools:
- name: string # Required: Name of the tool
description: string # Required: Description of what the tool does
input_schema: object # Required: JSON Schema defining the tool's input parameters
handler: string # Required: Path to the function that implements the tool
```
#### Field Details
- `name`: A unique identifier for the tool
- `description`: A clear description of what the tool does, used by LLMs to determine when to call it
- `input_schema`: JSON Schema object defining the expected input parameters
- `handler`: String path to the Python function that implements the tool (e.g., "module.submodule.function_name")
#### Example Tool Definition
```yaml
mcp_tools:
- name: "get_current_time"
description: "Get the current time in a specified format"
input_schema: {
"type": "object",
"properties": {
"format": {
"type": "string",
"description": "The format of the time to return",
"enum": ["short", "long", "iso"]
},
"timezone": {
"type": "string",
"description": "The timezone to use (e.g., 'UTC', 'America/New_York')",
"default": "UTC"
}
},
"required": ["format"]
}
handler: "mcp_tools.get_current_time"
```

View file

@ -664,6 +664,58 @@ curl http://0.0.0.0:4000/v1/chat/completions \
</TabItem>
</Tabs>
## Usage - Latency Optimized Inference
Valid from v1.65.1+
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
response = completion(
model="bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0",
messages=[{"role": "user", "content": "What is the capital of France?"}],
performanceConfig={"latency": "optimized"},
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: bedrock-claude-3-7
litellm_params:
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
performanceConfig: {"latency": "optimized"} # 👈 EITHER HERE OR ON REQUEST
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "bedrock-claude-3-7",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"performanceConfig": {"latency": "optimized"} # 👈 EITHER HERE OR ON CONFIG.YAML
}'
```
</TabItem>
</Tabs>
## Usage - Bedrock Guardrails
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
@ -1776,6 +1828,7 @@ response = completion(
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
@ -1820,11 +1873,13 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
```
</TabItem>
</Tabs>
### SSO Login (AWS Profile)
- Set `AWS_PROFILE` environment variable
- Make bedrock completion call
```python
import os
from litellm import completion
@ -1917,12 +1972,46 @@ model_list:
</Tabs>
Text to Image :
```bash
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
-d '{
"model": "amazon.nova-canvas-v1:0",
"prompt": "A cute baby sea otter"
}'
```
Color Guided Generation:
```bash
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
-d '{
"model": "amazon.nova-canvas-v1:0",
"prompt": "A cute baby sea otter",
"taskType": "COLOR_GUIDED_GENERATION",
"colorGuidedGenerationParams":{"colors":["#FFFFFF"]}
}'
```
| Model Name | Function Call |
|-------------------------|---------------------------------------------|
| Stable Diffusion 3 - v0 | `image_generation(model="bedrock/stability.stability.sd3-large-v1:0", prompt=prompt)` |
| Stable Diffusion - v0 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v0", prompt=prompt)` |
| Stable Diffusion - v1 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v1", prompt=prompt)` |
| Amazon Nova Canvas - v0 | `image_generation(model="bedrock/amazon.nova-canvas-v1:0", prompt=prompt)` |
### Passing an external BedrockRuntime.Client as a parameter - Completion()
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
:::warning
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
Experimental - 2024-Jun-23:

View file

@ -589,8 +589,10 @@ response = litellm.completion(
"content": [
{"type": "text", "text": "Please summarize the audio."},
{
"type": "image_url",
"image_url": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
"type": "file",
"file": {
"file_data": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
}
},
],
}
@ -640,8 +642,11 @@ response = litellm.completion(
"content": [
{"type": "text", "text": "Please summarize the file."},
{
"type": "image_url",
"image_url": "https://storage..." # 👈 SET THE IMG URL
"type": "file",
"file": {
"file_id": "https://storage...", # 👈 SET THE IMG URL
"format": "application/pdf" # OPTIONAL
}
},
],
}
@ -668,8 +673,11 @@ response = litellm.completion(
"content": [
{"type": "text", "text": "Please summarize the file."},
{
"type": "image_url",
"image_url": "gs://..." # 👈 SET THE cloud storage bucket url
"type": "file",
"file": {
"file_id": "gs://storage...", # 👈 SET THE IMG URL
"format": "application/pdf" # OPTIONAL
}
},
],
}

View file

@ -325,6 +325,74 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
## OpenAI Audio Transcription
LiteLLM supports OpenAI Audio Transcription endpoint.
Supported models:
| Model Name | Function Call |
|---------------------------|-----------------------------------------------------------------|
| `whisper-1` | `response = completion(model="whisper-1", file=audio_file)` |
| `gpt-4o-transcribe` | `response = completion(model="gpt-4o-transcribe", file=audio_file)` |
| `gpt-4o-mini-transcribe` | `response = completion(model="gpt-4o-mini-transcribe", file=audio_file)` |
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import transcription
import os
# set api keys
os.environ["OPENAI_API_KEY"] = ""
audio_file = open("/path/to/audio.mp3", "rb")
response = transcription(model="gpt-4o-transcribe", file=audio_file)
print(f"response: {response}")
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: gpt-4o-transcribe
litellm_params:
model: gpt-4o-transcribe
api_key: os.environ/OPENAI_API_KEY
model_info:
mode: audio_transcription
general_settings:
master_key: sk-1234
```
2. Start the proxy
```bash
litellm --config config.yaml
```
3. Test it!
```bash
curl --location 'http://0.0.0.0:8000/v1/audio/transcriptions' \
--header 'Authorization: Bearer sk-1234' \
--form 'file=@"/Users/krrishdholakia/Downloads/gettysburg.wav"' \
--form 'model="gpt-4o-transcribe"'
```
</TabItem>
</Tabs>
## Advanced
### Getting OpenAI API Response Headers

View file

@ -1369,6 +1369,103 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
</Tabs>
## Gemini Pro
| Model Name | Function Call |
|------------------|--------------------------------------|
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
## Fine-tuned Models
You can call fine-tuned Vertex AI Gemini models through LiteLLM
| Property | Details |
|----------|---------|
| Provider Route | `vertex_ai/gemini/{MODEL_ID}` |
| Vertex Documentation | [Vertex AI - Fine-tuned Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning#test_the_tuned_model_with_a_prompt)|
| Supported Operations | `/chat/completions`, `/completions`, `/embeddings`, `/images` |
To use a model that follows the `/gemini` request/response format, simply set the model parameter as
```python title="Model parameter for calling fine-tuned gemini models"
model="vertex_ai/gemini/<your-finetuned-model>"
```
<Tabs>
<TabItem value="sdk" label="LiteLLM Python SDK">
```python showLineNumbers title="Example"
import litellm
import os
## set ENV variables
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
os.environ["VERTEXAI_LOCATION"] = "us-central1"
response = litellm.completion(
model="vertex_ai/gemini/<your-finetuned-model>", # e.g. vertex_ai/gemini/4965075652664360960
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
```
</TabItem>
<TabItem value="proxy" label="LiteLLM Proxy">
1. Add Vertex Credentials to your env
```bash title="Authenticate to Vertex AI"
!gcloud auth application-default login
```
2. Setup config.yaml
```yaml showLineNumbers title="Add to litellm config"
- model_name: finetuned-gemini
litellm_params:
model: vertex_ai/gemini/<ENDPOINT_ID>
vertex_project: <PROJECT_ID>
vertex_location: <LOCATION>
```
3. Test it!
<Tabs>
<TabItem value="openai" label="OpenAI Python SDK">
```python showLineNumbers title="Example request"
from openai import OpenAI
client = OpenAI(
api_key="your-litellm-key",
base_url="http://0.0.0.0:4000"
)
response = client.chat.completions.create(
model="finetuned-gemini",
messages=[
{"role": "user", "content": "hi"}
]
)
print(response)
```
</TabItem>
<TabItem value="curl" label="curl">
```bash showLineNumbers title="Example request"
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: <LITELLM_KEY>' \
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
## Model Garden
:::tip
@ -1479,67 +1576,6 @@ response = completion(
</Tabs>
## Gemini Pro
| Model Name | Function Call |
|------------------|--------------------------------------|
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
## Fine-tuned Models
Fine tuned models on vertex have a numerical model/endpoint id.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
import os
## set ENV variables
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
os.environ["VERTEXAI_LOCATION"] = "us-central1"
response = completion(
model="vertex_ai/<your-finetuned-model>", # e.g. vertex_ai/4965075652664360960
messages=[{ "content": "Hello, how are you?","role": "user"}],
base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Add Vertex Credentials to your env
```bash
!gcloud auth application-default login
```
2. Setup config.yaml
```yaml
- model_name: finetuned-gemini
litellm_params:
model: vertex_ai/<ENDPOINT_ID>
vertex_project: <PROJECT_ID>
vertex_location: <LOCATION>
model_info:
base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
```
3. Test it!
```bash
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: <LITELLM_KEY>' \
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
```
</TabItem>
</Tabs>
## Gemini Pro Vision
| Model Name | Function Call |
@ -1684,23 +1720,25 @@ assert isinstance(
```
## Usage - PDF / Videos / etc. Files
## Usage - PDF / Videos / Audio etc. Files
Pass any file supported by Vertex AI, through LiteLLM.
LiteLLM Supports the following image types passed in url
LiteLLM Supports the following file types passed in url.
Using `file` message type for VertexAI is live from v1.65.1+
```
Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
Files with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
Files with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
Base64 Encoded Local Images
Base64 Encoded Local Files
```
<Tabs>
<TabItem value="sdk" label="SDK">
### **Using `gs://`**
### **Using `gs://` or any URL**
```python
from litellm import completion
@ -1712,8 +1750,11 @@ response = completion(
"content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{
"type": "image_url",
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", # 👈 PDF
"type": "file",
"file": {
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
"format": "application/pdf" # OPTIONAL - specify mime-type
}
},
],
}
@ -1747,8 +1788,16 @@ response = completion(
"content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{
"type": "image_url",
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
"type": "file",
"file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
},
{
"type": "audio_input",
"audio_input {
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
}
},
],
}
@ -1794,8 +1843,11 @@ curl http://0.0.0.0:4000/v1/chat/completions \
"text": "You are a very professional document summarization specialist. Please summarize the given document"
},
{
"type": "image_url",
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf" # 👈 PDF
"type": "file",
"file": {
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
"format": "application/pdf" # OPTIONAL
}
}
}
]
@ -1822,10 +1874,17 @@ curl http://0.0.0.0:4000/v1/chat/completions \
"text": "You are a very professional document summarization specialist. Please summarize the given document"
},
{
"type": "image_url",
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
}
"type": "file",
"file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
},
},
{
"type": "audio_input",
"audio_input {
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
}
},
]
}
],
@ -1836,6 +1895,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
</TabItem>
</Tabs>
## Chat Models
| Model Name | Function Call |
|------------------|--------------------------------------|
@ -2044,7 +2104,12 @@ print(response)
## **Multi-Modal Embeddings**
Usage
Known Limitations:
- Only supports 1 image / video / image per request
- Only supports GCS or base64 encoded images / videos
### Usage
<Tabs>
<TabItem value="sdk" label="SDK">
@ -2260,6 +2325,115 @@ print(f"Text Embedding: {embeddings.text_embedding}")
</Tabs>
### Text + Image + Video Embeddings
<Tabs>
<TabItem value="sdk" label="SDK">
Text + Image
```python
response = await litellm.aembedding(
model="vertex_ai/multimodalembedding@001",
input=["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"] # will be sent as a gcs image
)
```
Text + Video
```python
response = await litellm.aembedding(
model="vertex_ai/multimodalembedding@001",
input=["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
)
```
Image + Video
```python
response = await litellm.aembedding(
model="vertex_ai/multimodalembedding@001",
input=["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
)
```
</TabItem>
<TabItem value="proxy" label="LiteLLM PROXY (Unified Endpoint)">
1. Add model to config.yaml
```yaml
model_list:
- model_name: multimodalembedding@001
litellm_params:
model: vertex_ai/multimodalembedding@001
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: adroit-crow-413218-a956eef1a2a8.json
litellm_settings:
drop_params: True
```
2. Start Proxy
```
$ litellm --config /path/to/config.yaml
```
3. Make Request use OpenAI Python SDK, Langchain Python SDK
Text + Image
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.embeddings.create(
model="multimodalembedding@001",
input = ["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"],
)
print(response)
```
Text + Video
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.embeddings.create(
model="multimodalembedding@001",
input = ["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"],
)
print(response)
```
Image + Video
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.embeddings.create(
model="multimodalembedding@001",
input = ["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"],
)
print(response)
```
</TabItem>
</Tabs>
## **Image Generation Models**
Usage

View file

@ -147,6 +147,11 @@ Some SSO providers require a specific redirect url for login and logout. You can
- Login: `<your-proxy-base-url>/sso/key/generate`
- Logout: `<your-proxy-base-url>`
Here's the env var to set the logout url on the proxy
```bash
PROXY_LOGOUT_URL="https://www.google.com"
```
#### Step 3. Set `PROXY_BASE_URL` in your .env
Set this in your .env (so the proxy can set the correct redirect url)

View file

@ -160,7 +160,7 @@ general_settings:
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key [Doc on graceful db unavailability](prod#5-if-running-litellm-on-vpc-gracefully-handle-db-unavailability) |
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
@ -479,7 +479,7 @@ router_settings:
| PROXY_ADMIN_ID | Admin identifier for proxy server
| PROXY_BASE_URL | Base URL for proxy service
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
| PROXY_MASTER_KEY | Master key for proxy authentication
| LITELLM_MASTER_KEY | Master key for proxy authentication
| QDRANT_API_BASE | Base URL for Qdrant API
| QDRANT_API_KEY | API key for Qdrant service
| QDRANT_URL | Connection URL for Qdrant database
@ -515,4 +515,5 @@ router_settings:
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
| USE_PRISMA_MIGRATE | Flag to use prisma migrate instead of prisma db push. Recommended for production environments.
| WEBHOOK_URL | URL for receiving webhooks from external services

View file

@ -94,15 +94,31 @@ This disables the load_dotenv() functionality, which will automatically load you
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
This will allow LiteLLM to continue to process requests even if the DB is unavailable. This is better handling for DB unavailability.
When running LiteLLM on a VPC (and inaccessible from the public internet), you can enable graceful degradation so that request processing continues even if the database is temporarily unavailable.
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
```yaml
#### Configuration
```yaml showLineNumbers title="litellm config.yaml"
general_settings:
allow_requests_on_db_unavailable: True
```
#### Expected Behavior
When `allow_requests_on_db_unavailable` is set to `true`, LiteLLM will handle errors as follows:
| Type of Error | Expected Behavior | Details |
|---------------|-------------------|----------------|
| Prisma Errors | ✅ Request will be allowed | Covers issues like DB connection resets or rejections from the DB via Prisma, the ORM used by LiteLLM. |
| Httpx Errors | ✅ Request will be allowed | Occurs when the database is unreachable, allowing the request to proceed despite the DB outage. |
| Pod Startup Behavior | ✅ Pods start regardless | LiteLLM Pods will start even if the database is down or unreachable, ensuring higher uptime guarantees for deployments. |
| Health/Readiness Check | ✅ Always returns 200 OK | The /health/readiness endpoint returns a 200 OK status to ensure that pods remain operational even when the database is unavailable.
| LiteLLM Budget Errors or Model Errors | ❌ Request will be blocked | Triggered when the DB is reachable but the authentication token is invalid, lacks access, or exceeds budget limits. |
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
By default, LiteLLM writes several types of logs to the database:
@ -183,93 +199,3 @@ You should only see the following level of details in logs on the proxy server
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
```
### Machine Specifications to Deploy LiteLLM
| Service | Spec | CPUs | Memory | Architecture | Version|
| --- | --- | --- | --- | --- | --- |
| Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
| Redis Cache | - | - | - | - | 7.0+ Redis Engine|
### Reference Kubernetes Deployment YAML
Reference Kubernetes `deployment.yaml` that was load tested by us
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-deployment
spec:
replicas: 3
selector:
matchLabels:
app: litellm
template:
metadata:
labels:
app: litellm
spec:
containers:
- name: litellm-container
image: ghcr.io/berriai/litellm:main-latest
imagePullPolicy: Always
env:
- name: AZURE_API_KEY
value: "d6******"
- name: AZURE_API_BASE
value: "https://ope******"
- name: LITELLM_MASTER_KEY
value: "sk-1234"
- name: DATABASE_URL
value: "po**********"
args:
- "--config"
- "/app/proxy_config.yaml" # Update the path to mount the config file
volumeMounts: # Define volume mount for proxy_config.yaml
- name: config-volume
mountPath: /app
readOnly: true
livenessProbe:
httpGet:
path: /health/liveliness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
successThreshold: 1
failureThreshold: 3
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /health/readiness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
successThreshold: 1
failureThreshold: 3
timeoutSeconds: 10
volumes: # Define volume to mount proxy_config.yaml
- name: config-volume
configMap:
name: litellm-config
```
Reference Kubernetes `service.yaml` that was load tested by us
```yaml
apiVersion: v1
kind: Service
metadata:
name: litellm-service
spec:
selector:
app: litellm
ports:
- protocol: TCP
port: 4000
targetPort: 4000
type: LoadBalancer
```

View file

@ -188,7 +188,13 @@ Currently implemented for:
- OpenAI (if OPENAI_API_KEY is set)
- Fireworks AI (if FIREWORKS_AI_API_KEY is set)
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
- Gemini (if GEMINI_API_KEY is set)
- XAI (if XAI_API_KEY is set)
- Anthropic (if ANTHROPIC_API_KEY is set)
You can also specify a custom provider to check:
**All providers**:
```python
from litellm import get_valid_models
@ -196,6 +202,14 @@ valid_models = get_valid_models(check_provider_endpoint=True)
print(valid_models)
```
**Specific provider**:
```python
from litellm import get_valid_models
valid_models = get_valid_models(check_provider_endpoint=True, custom_llm_provider="openai")
print(valid_models)
```
### `validate_environment(model: str)`
This helper tells you if you have all the required environment variables for a model, and if not - what's missing.

View file

@ -98,6 +98,5 @@ On the models dropdown select `thinking-anthropic-claude-3-7-sonnet`
<Image img={require('../../img/litellm_thinking_openweb.gif')} />
## Additional Resources
- Running LiteLLM and OpenWebUI on Windows Localhost: A Comprehensive Guide [https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/](https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/)

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 237 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

View file

@ -24,6 +24,7 @@ This release brings:
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
- Perf improvements for Usage-based Routing
- Streaming guardrail support via websockets
- Azure OpenAI client perf fix (from previous release)
## Docker Run LiteLLM Proxy
@ -31,7 +32,7 @@ This release brings:
docker run
-e STORE_MODEL_IN_DB=True
-p 4000:4000
ghcr.io/berriai/litellm:main-v1.63.14-stable
ghcr.io/berriai/litellm:main-v1.63.14-stable.patch1
```
## Demo Instance

View file

@ -0,0 +1,160 @@
---
title: v1.65.0-stable - Model Context Protocol
slug: v1.65.0-stable
date: 2025-03-30T10:00:00
authors:
- name: Krrish Dholakia
title: CEO, LiteLLM
url: https://www.linkedin.com/in/krish-d/
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
- name: Ishaan Jaffer
title: CTO, LiteLLM
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
tags: [mcp, custom_prompt_management]
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
v1.65.0-stable is live now. Here are the key highlights of this release:
- **MCP Support**: Support for adding and using MCP servers on the LiteLLM proxy.
- **UI view total usage after 1M+ logs**: You can now view usage analytics after crossing 1M+ logs in DB.
## Model Context Protocol (MCP)
This release introduces support for centrally adding MCP servers on LiteLLM. This allows you to add MCP server endpoints and your developers can `list` and `call` MCP tools through LiteLLM.
Read more about MCP [here](https://docs.litellm.ai/docs/mcp).
<Image
img={require('../../img/release_notes/mcp_ui.png')}
style={{width: '100%', display: 'block', margin: '2rem auto'}}
/>
<p style={{textAlign: 'left', color: '#666'}}>
Expose and use MCP servers through LiteLLM
</p>
## UI view total usage after 1M+ logs
This release brings the ability to view total usage analytics even after exceeding 1M+ logs in your database. We've implemented a scalable architecture that stores only aggregate usage data, resulting in significantly more efficient queries and reduced database CPU utilization.
<Image
img={require('../../img/release_notes/ui_usage.png')}
style={{width: '100%', display: 'block', margin: '2rem auto'}}
/>
<p style={{textAlign: 'left', color: '#666'}}>
View total usage after 1M+ logs
</p>
- How this works:
- We now aggregate usage data into a dedicated DailyUserSpend table, significantly reducing query load and CPU usage even beyond 1M+ logs.
- Daily Spend Breakdown API:
- Retrieve granular daily usage data (by model, provider, and API key) with a single endpoint.
Example Request:
```shell title="Daily Spend Breakdown API" showLineNumbers
curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \
-H 'Authorization: Bearer sk-...'
```
```json title="Daily Spend Breakdown API Response" showLineNumbers
{
"results": [
{
"date": "2025-03-27",
"metrics": {
"spend": 0.0177072,
"prompt_tokens": 111,
"completion_tokens": 1711,
"total_tokens": 1822,
"api_requests": 11
},
"breakdown": {
"models": {
"gpt-4o-mini": {
"spend": 1.095e-05,
"prompt_tokens": 37,
"completion_tokens": 9,
"total_tokens": 46,
"api_requests": 1
},
"providers": { "openai": { ... }, "azure_ai": { ... } },
"api_keys": { "3126b6eaf1...": { ... } }
}
}
],
"metadata": {
"total_spend": 0.7274667,
"total_prompt_tokens": 280990,
"total_completion_tokens": 376674,
"total_api_requests": 14
}
}
```
## New Models / Updated Models
- Support for Vertex AI gemini-2.0-flash-lite & Google AI Studio gemini-2.0-flash-lite [PR](https://github.com/BerriAI/litellm/pull/9523)
- Support for Vertex AI Fine-Tuned LLMs [PR](https://github.com/BerriAI/litellm/pull/9542)
- Nova Canvas image generation support [PR](https://github.com/BerriAI/litellm/pull/9525)
- OpenAI gpt-4o-transcribe support [PR](https://github.com/BerriAI/litellm/pull/9517)
- Added new Vertex AI text embedding model [PR](https://github.com/BerriAI/litellm/pull/9476)
## LLM Translation
- OpenAI Web Search Tool Call Support [PR](https://github.com/BerriAI/litellm/pull/9465)
- Vertex AI topLogprobs support [PR](https://github.com/BerriAI/litellm/pull/9518)
- Support for sending images and video to Vertex AI multimodal embedding [Doc](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings)
- Support litellm.api_base for Vertex AI + Gemini across completion, embedding, image_generation [PR](https://github.com/BerriAI/litellm/pull/9516)
- Bug fix for returning `response_cost` when using litellm python SDK with LiteLLM Proxy [PR](https://github.com/BerriAI/litellm/commit/6fd18651d129d606182ff4b980e95768fc43ca3d)
- Support for `max_completion_tokens` on Mistral API [PR](https://github.com/BerriAI/litellm/pull/9606)
- Refactored Vertex AI passthrough routes - fixes unpredictable behaviour with auto-setting default_vertex_region on router model add [PR](https://github.com/BerriAI/litellm/pull/9467)
## Spend Tracking Improvements
- Log 'api_base' on spend logs [PR](https://github.com/BerriAI/litellm/pull/9509)
- Support for Gemini audio token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
- Fixed OpenAI audio input token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
## UI
### Model Management
- Allowed team admins to add/update/delete models on UI [PR](https://github.com/BerriAI/litellm/pull/9572)
- Added render supports_web_search on model hub [PR](https://github.com/BerriAI/litellm/pull/9469)
### Request Logs
- Show API base and model ID on request logs [PR](https://github.com/BerriAI/litellm/pull/9572)
- Allow viewing keyinfo on request logs [PR](https://github.com/BerriAI/litellm/pull/9568)
### Usage Tab
- Added Daily User Spend Aggregate view - allows UI Usage tab to work > 1m rows [PR](https://github.com/BerriAI/litellm/pull/9538)
- Connected UI to "LiteLLM_DailyUserSpend" spend table [PR](https://github.com/BerriAI/litellm/pull/9603)
## Logging Integrations
- Fixed StandardLoggingPayload for GCS Pub Sub Logging Integration [PR](https://github.com/BerriAI/litellm/pull/9508)
- Track `litellm_model_name` on `StandardLoggingPayload` [Docs](https://docs.litellm.ai/docs/proxy/logging_spec#standardlogginghiddenparams)
## Performance / Reliability Improvements
- LiteLLM Redis semantic caching implementation [PR](https://github.com/BerriAI/litellm/pull/9356)
- Gracefully handle exceptions when DB is having an outage [PR](https://github.com/BerriAI/litellm/pull/9533)
- Allow Pods to startup + passing /health/readiness when allow_requests_on_db_unavailable: True and DB is down [PR](https://github.com/BerriAI/litellm/pull/9569)
## General Improvements
- Support for exposing MCP tools on litellm proxy [PR](https://github.com/BerriAI/litellm/pull/9426)
- Support discovering Gemini, Anthropic, xAI models by calling their /v1/model endpoint [PR](https://github.com/BerriAI/litellm/pull/9530)
- Fixed route check for non-proxy admins on JWT auth [PR](https://github.com/BerriAI/litellm/pull/9454)
- Added baseline Prisma database migrations [PR](https://github.com/BerriAI/litellm/pull/9565)
- View all wildcard models on /model/info [PR](https://github.com/BerriAI/litellm/pull/9572)
## Security
- Bumped next from 14.2.21 to 14.2.25 in UI dashboard [PR](https://github.com/BerriAI/litellm/pull/9458)
## Complete Git Diff
[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.63.14-stable.patch1...v1.65.0-stable)

View file

@ -0,0 +1,34 @@
---
title: v1.65.0 - Team Model Add - update
slug: v1.65.0
date: 2025-03-28T10:00:00
authors:
- name: Krrish Dholakia
title: CEO, LiteLLM
url: https://www.linkedin.com/in/krish-d/
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
- name: Ishaan Jaffer
title: CTO, LiteLLM
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
tags: [management endpoints, team models, ui]
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
v1.65.0 updates the `/model/new` endpoint to prevent non-team admins from creating team models.
This means that only proxy admins or team admins can create team models.
## Additional Changes
- Allows team admins to call `/model/update` to update team models.
- Allows team admins to call `/model/delete` to delete team models.
- Introduces new `user_models_only` param to `/v2/model/info` - only return models added by this user.
These changes enable team admins to add and manage models for their team on the LiteLLM UI + API.
<Image img={require('../../img/release_notes/team_model_add.png')} />

View file

@ -304,7 +304,6 @@ const sidebars = {
"image_variations",
]
},
"mcp",
{
type: "category",
label: "/audio",

View file

@ -444,9 +444,7 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
detected_secrets = []
for file in secrets.files:
for found_secret in secrets[file]:
if found_secret.secret_value is None:
continue
detected_secrets.append(
@ -471,14 +469,12 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
data: dict,
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
):
if await self.should_run_check(user_api_key_dict) is False:
return
if "messages" in data and isinstance(data["messages"], list):
for message in data["messages"]:
if "content" in message and isinstance(message["content"], str):
detected_secrets = self.scan_message_for_secrets(message["content"])
for secret in detected_secrets:

View file

@ -0,0 +1,26 @@
Portions of this software are licensed as follows:
* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
---
MIT License
Copyright (c) 2023 Berri AI
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,21 @@
Additional files for the proxy. Reduces the size of the main litellm package.
Currently, only stores the migration.sql files for litellm-proxy.
To install, run:
```bash
pip install litellm-proxy-extras
```
OR
```bash
pip install litellm[proxy] # installs litellm-proxy-extras and other proxy dependencies.
```
To use the migrations, run:
```bash
litellm --use_prisma_migrate
```

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,12 @@
import logging
# Set up package logger
logger = logging.getLogger("litellm_proxy_extras")
if not logger.handlers: # Only add handler if none exists
handler = logging.StreamHandler()
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

View file

@ -0,0 +1,360 @@
-- CreateTable
CREATE TABLE "LiteLLM_BudgetTable" (
"budget_id" TEXT NOT NULL,
"max_budget" DOUBLE PRECISION,
"soft_budget" DOUBLE PRECISION,
"max_parallel_requests" INTEGER,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"model_max_budget" JSONB,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_BudgetTable_pkey" PRIMARY KEY ("budget_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_CredentialsTable" (
"credential_id" TEXT NOT NULL,
"credential_name" TEXT NOT NULL,
"credential_values" JSONB NOT NULL,
"credential_info" JSONB,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_CredentialsTable_pkey" PRIMARY KEY ("credential_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_ProxyModelTable" (
"model_id" TEXT NOT NULL,
"model_name" TEXT NOT NULL,
"litellm_params" JSONB NOT NULL,
"model_info" JSONB,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_ProxyModelTable_pkey" PRIMARY KEY ("model_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_OrganizationTable" (
"organization_id" TEXT NOT NULL,
"organization_alias" TEXT NOT NULL,
"budget_id" TEXT NOT NULL,
"metadata" JSONB NOT NULL DEFAULT '{}',
"models" TEXT[],
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"model_spend" JSONB NOT NULL DEFAULT '{}',
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_OrganizationTable_pkey" PRIMARY KEY ("organization_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_ModelTable" (
"id" SERIAL NOT NULL,
"aliases" JSONB,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_ModelTable_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "LiteLLM_TeamTable" (
"team_id" TEXT NOT NULL,
"team_alias" TEXT,
"organization_id" TEXT,
"admins" TEXT[],
"members" TEXT[],
"members_with_roles" JSONB NOT NULL DEFAULT '{}',
"metadata" JSONB NOT NULL DEFAULT '{}',
"max_budget" DOUBLE PRECISION,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"models" TEXT[],
"max_parallel_requests" INTEGER,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"blocked" BOOLEAN NOT NULL DEFAULT false,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"model_spend" JSONB NOT NULL DEFAULT '{}',
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
"model_id" INTEGER,
CONSTRAINT "LiteLLM_TeamTable_pkey" PRIMARY KEY ("team_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_UserTable" (
"user_id" TEXT NOT NULL,
"user_alias" TEXT,
"team_id" TEXT,
"sso_user_id" TEXT,
"organization_id" TEXT,
"password" TEXT,
"teams" TEXT[] DEFAULT ARRAY[]::TEXT[],
"user_role" TEXT,
"max_budget" DOUBLE PRECISION,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"user_email" TEXT,
"models" TEXT[],
"metadata" JSONB NOT NULL DEFAULT '{}',
"max_parallel_requests" INTEGER,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
"model_spend" JSONB NOT NULL DEFAULT '{}',
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "LiteLLM_UserTable_pkey" PRIMARY KEY ("user_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_VerificationToken" (
"token" TEXT NOT NULL,
"key_name" TEXT,
"key_alias" TEXT,
"soft_budget_cooldown" BOOLEAN NOT NULL DEFAULT false,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"expires" TIMESTAMP(3),
"models" TEXT[],
"aliases" JSONB NOT NULL DEFAULT '{}',
"config" JSONB NOT NULL DEFAULT '{}',
"user_id" TEXT,
"team_id" TEXT,
"permissions" JSONB NOT NULL DEFAULT '{}',
"max_parallel_requests" INTEGER,
"metadata" JSONB NOT NULL DEFAULT '{}',
"blocked" BOOLEAN,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"max_budget" DOUBLE PRECISION,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
"model_spend" JSONB NOT NULL DEFAULT '{}',
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
"budget_id" TEXT,
"organization_id" TEXT,
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT,
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT,
CONSTRAINT "LiteLLM_VerificationToken_pkey" PRIMARY KEY ("token")
);
-- CreateTable
CREATE TABLE "LiteLLM_EndUserTable" (
"user_id" TEXT NOT NULL,
"alias" TEXT,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"allowed_model_region" TEXT,
"default_model" TEXT,
"budget_id" TEXT,
"blocked" BOOLEAN NOT NULL DEFAULT false,
CONSTRAINT "LiteLLM_EndUserTable_pkey" PRIMARY KEY ("user_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_Config" (
"param_name" TEXT NOT NULL,
"param_value" JSONB,
CONSTRAINT "LiteLLM_Config_pkey" PRIMARY KEY ("param_name")
);
-- CreateTable
CREATE TABLE "LiteLLM_SpendLogs" (
"request_id" TEXT NOT NULL,
"call_type" TEXT NOT NULL,
"api_key" TEXT NOT NULL DEFAULT '',
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"total_tokens" INTEGER NOT NULL DEFAULT 0,
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
"startTime" TIMESTAMP(3) NOT NULL,
"endTime" TIMESTAMP(3) NOT NULL,
"completionStartTime" TIMESTAMP(3),
"model" TEXT NOT NULL DEFAULT '',
"model_id" TEXT DEFAULT '',
"model_group" TEXT DEFAULT '',
"custom_llm_provider" TEXT DEFAULT '',
"api_base" TEXT DEFAULT '',
"user" TEXT DEFAULT '',
"metadata" JSONB DEFAULT '{}',
"cache_hit" TEXT DEFAULT '',
"cache_key" TEXT DEFAULT '',
"request_tags" JSONB DEFAULT '[]',
"team_id" TEXT,
"end_user" TEXT,
"requester_ip_address" TEXT,
"messages" JSONB DEFAULT '{}',
"response" JSONB DEFAULT '{}',
CONSTRAINT "LiteLLM_SpendLogs_pkey" PRIMARY KEY ("request_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_ErrorLogs" (
"request_id" TEXT NOT NULL,
"startTime" TIMESTAMP(3) NOT NULL,
"endTime" TIMESTAMP(3) NOT NULL,
"api_base" TEXT NOT NULL DEFAULT '',
"model_group" TEXT NOT NULL DEFAULT '',
"litellm_model_name" TEXT NOT NULL DEFAULT '',
"model_id" TEXT NOT NULL DEFAULT '',
"request_kwargs" JSONB NOT NULL DEFAULT '{}',
"exception_type" TEXT NOT NULL DEFAULT '',
"exception_string" TEXT NOT NULL DEFAULT '',
"status_code" TEXT NOT NULL DEFAULT '',
CONSTRAINT "LiteLLM_ErrorLogs_pkey" PRIMARY KEY ("request_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_UserNotifications" (
"request_id" TEXT NOT NULL,
"user_id" TEXT NOT NULL,
"models" TEXT[],
"justification" TEXT NOT NULL,
"status" TEXT NOT NULL,
CONSTRAINT "LiteLLM_UserNotifications_pkey" PRIMARY KEY ("request_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_TeamMembership" (
"user_id" TEXT NOT NULL,
"team_id" TEXT NOT NULL,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"budget_id" TEXT,
CONSTRAINT "LiteLLM_TeamMembership_pkey" PRIMARY KEY ("user_id","team_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_OrganizationMembership" (
"user_id" TEXT NOT NULL,
"organization_id" TEXT NOT NULL,
"user_role" TEXT,
"spend" DOUBLE PRECISION DEFAULT 0.0,
"budget_id" TEXT,
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "LiteLLM_OrganizationMembership_pkey" PRIMARY KEY ("user_id","organization_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_InvitationLink" (
"id" TEXT NOT NULL,
"user_id" TEXT NOT NULL,
"is_accepted" BOOLEAN NOT NULL DEFAULT false,
"accepted_at" TIMESTAMP(3),
"expires_at" TIMESTAMP(3) NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_InvitationLink_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "LiteLLM_AuditLog" (
"id" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"changed_by" TEXT NOT NULL DEFAULT '',
"changed_by_api_key" TEXT NOT NULL DEFAULT '',
"action" TEXT NOT NULL,
"table_name" TEXT NOT NULL,
"object_id" TEXT NOT NULL,
"before_value" JSONB,
"updated_values" JSONB,
CONSTRAINT "LiteLLM_AuditLog_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_CredentialsTable_credential_name_key" ON "LiteLLM_CredentialsTable"("credential_name");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_TeamTable_model_id_key" ON "LiteLLM_TeamTable"("model_id");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_UserTable_sso_user_id_key" ON "LiteLLM_UserTable"("sso_user_id");
-- CreateIndex
CREATE INDEX "LiteLLM_SpendLogs_startTime_idx" ON "LiteLLM_SpendLogs"("startTime");
-- CreateIndex
CREATE INDEX "LiteLLM_SpendLogs_end_user_idx" ON "LiteLLM_SpendLogs"("end_user");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_OrganizationMembership_user_id_organization_id_key" ON "LiteLLM_OrganizationMembership"("user_id", "organization_id");
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationTable" ADD CONSTRAINT "LiteLLM_OrganizationTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_model_id_fkey" FOREIGN KEY ("model_id") REFERENCES "LiteLLM_ModelTable"("id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_UserTable" ADD CONSTRAINT "LiteLLM_UserTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_EndUserTable" ADD CONSTRAINT "LiteLLM_EndUserTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_TeamMembership" ADD CONSTRAINT "LiteLLM_TeamMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_updated_by_fkey" FOREIGN KEY ("updated_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;

View file

@ -0,0 +1,33 @@
-- CreateTable
CREATE TABLE "LiteLLM_DailyUserSpend" (
"id" TEXT NOT NULL,
"user_id" TEXT NOT NULL,
"date" TEXT NOT NULL,
"api_key" TEXT NOT NULL,
"model" TEXT NOT NULL,
"model_group" TEXT,
"custom_llm_provider" TEXT,
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
CONSTRAINT "LiteLLM_DailyUserSpend_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_date_idx" ON "LiteLLM_DailyUserSpend"("date");
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_user_id_idx" ON "LiteLLM_DailyUserSpend"("user_id");
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_api_key_idx" ON "LiteLLM_DailyUserSpend"("api_key");
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_model_idx" ON "LiteLLM_DailyUserSpend"("model");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_DailyUserSpend_user_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyUserSpend"("user_id", "date", "api_key", "model", "custom_llm_provider");

View file

@ -0,0 +1,3 @@
-- AlterTable
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "api_requests" INTEGER NOT NULL DEFAULT 0;

View file

@ -0,0 +1,14 @@
-- CreateEnum
CREATE TYPE "JobStatus" AS ENUM ('ACTIVE', 'INACTIVE');
-- CreateTable
CREATE TABLE "LiteLLM_CronJob" (
"cronjob_id" TEXT NOT NULL,
"pod_id" TEXT NOT NULL,
"status" "JobStatus" NOT NULL DEFAULT 'INACTIVE',
"last_updated" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"ttl" TIMESTAMP(3) NOT NULL,
CONSTRAINT "LiteLLM_CronJob_pkey" PRIMARY KEY ("cronjob_id")
);

View file

@ -0,0 +1 @@
provider = "postgresql"

View file

@ -0,0 +1,80 @@
import os
import random
import subprocess
import time
from typing import Optional
from litellm_proxy_extras._logging import logger
def str_to_bool(value: Optional[str]) -> bool:
if value is None:
return False
return value.lower() in ("true", "1", "t", "y", "yes")
class ProxyExtrasDBManager:
@staticmethod
def setup_database(schema_path: str, use_migrate: bool = False) -> bool:
"""
Set up the database using either prisma migrate or prisma db push
Uses migrations from litellm-proxy-extras package
Args:
schema_path (str): Path to the Prisma schema file
use_migrate (bool): Whether to use prisma migrate instead of db push
Returns:
bool: True if setup was successful, False otherwise
"""
use_migrate = str_to_bool(os.getenv("USE_PRISMA_MIGRATE")) or use_migrate
for attempt in range(4):
original_dir = os.getcwd()
schema_dir = os.path.dirname(schema_path)
os.chdir(schema_dir)
try:
if use_migrate:
logger.info("Running prisma migrate deploy")
try:
# Set migrations directory for Prisma
subprocess.run(
["prisma", "migrate", "deploy"],
timeout=60,
check=True,
capture_output=True,
text=True,
)
logger.info("prisma migrate deploy completed")
return True
except subprocess.CalledProcessError as e:
logger.info(f"prisma db error: {e.stderr}, e: {e.stdout}")
if (
"P3005" in e.stderr
and "database schema is not empty" in e.stderr
):
logger.info("Error: Database schema is not empty")
return False
else:
# Use prisma db push with increased timeout
subprocess.run(
["prisma", "db", "push", "--accept-data-loss"],
timeout=60,
check=True,
)
return True
except subprocess.TimeoutExpired:
logger.info(f"Attempt {attempt + 1} timed out")
time.sleep(random.randrange(5, 15))
except subprocess.CalledProcessError as e:
attempts_left = 3 - attempt
retry_msg = (
f" Retrying... ({attempts_left} attempts left)"
if attempts_left > 0
else ""
)
logger.info(f"The process failed to execute. Details: {e}.{retry_msg}")
time.sleep(random.randrange(5, 15))
finally:
os.chdir(original_dir)
return False

View file

@ -0,0 +1,30 @@
[tool.poetry]
name = "litellm-proxy-extras"
version = "0.1.1"
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
authors = ["BerriAI"]
readme = "README.md"
[tool.poetry.urls]
homepage = "https://litellm.ai"
Homepage = "https://litellm.ai"
repository = "https://github.com/BerriAI/litellm"
Repository = "https://github.com/BerriAI/litellm"
documentation = "https://docs.litellm.ai"
Documentation = "https://docs.litellm.ai"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0, !=3.9.7"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "0.1.1"
version_files = [
"pyproject.toml:version",
"../requirements.txt:litellm-proxy-extras==",
"../pyproject.toml:litellm-proxy-extras = {version = \""
]

View file

View file

@ -2,7 +2,7 @@
import warnings
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
### INIT VARIABLES ##########
### INIT VARIABLES ###########
import threading
import os
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
@ -122,19 +122,19 @@ langsmith_batch_size: Optional[int] = None
prometheus_initialize_budget_metrics: Optional[bool] = False
argilla_batch_size: Optional[int] = None
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
gcs_pub_sub_use_v1: Optional[bool] = (
False # if you want to use v1 gcs pubsub logged payload
)
gcs_pub_sub_use_v1: Optional[
bool
] = False # if you want to use v1 gcs pubsub logged payload
argilla_transformation_object: Optional[Dict[str, Any]] = None
_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
[]
) # internal variable - async custom callbacks are routed here.
_async_success_callback: List[Union[str, Callable, CustomLogger]] = (
[]
) # internal variable - async custom callbacks are routed here.
_async_failure_callback: List[Union[str, Callable, CustomLogger]] = (
[]
) # internal variable - async custom callbacks are routed here.
_async_input_callback: List[
Union[str, Callable, CustomLogger]
] = [] # internal variable - async custom callbacks are routed here.
_async_success_callback: List[
Union[str, Callable, CustomLogger]
] = [] # internal variable - async custom callbacks are routed here.
_async_failure_callback: List[
Union[str, Callable, CustomLogger]
] = [] # internal variable - async custom callbacks are routed here.
pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False
@ -142,18 +142,18 @@ log_raw_request_response: bool = False
redact_messages_in_exceptions: Optional[bool] = False
redact_user_api_key_info: Optional[bool] = False
filter_invalid_headers: Optional[bool] = False
add_user_information_to_llm_headers: Optional[bool] = (
None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
)
add_user_information_to_llm_headers: Optional[
bool
] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
store_audit_logs = False # Enterprise feature, allow users to see audit logs
### end of callbacks #############
email: Optional[str] = (
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
token: Optional[str] = (
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
email: Optional[
str
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
token: Optional[
str
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
telemetry = True
max_tokens = 256 # OpenAI Defaults
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
@ -229,24 +229,20 @@ enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
enable_caching_on_provider_specific_optional_params: bool = (
False # feature-flag for caching on optional params - e.g. 'top_k'
)
caching: bool = (
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
caching_with_models: bool = (
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
cache: Optional[Cache] = (
None # cache object <- use this - https://docs.litellm.ai/docs/caching
)
caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
cache: Optional[
Cache
] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
default_in_memory_ttl: Optional[float] = None
default_redis_ttl: Optional[float] = None
default_redis_batch_cache_expiry: Optional[float] = None
model_alias_map: Dict[str, str] = {}
model_group_alias_map: Dict[str, str] = {}
max_budget: float = 0.0 # set the max budget across all providers
budget_duration: Optional[str] = (
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
)
budget_duration: Optional[
str
] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
default_soft_budget: float = (
50.0 # by default all litellm proxy keys have a soft budget of 50.0
)
@ -255,15 +251,11 @@ forward_traceparent_to_llm_provider: bool = False
_current_cost = 0.0 # private variable, used if max budget is set
error_logs: Dict = {}
add_function_to_prompt: bool = (
False # if function calling not supported by api, append function call details to system prompt
)
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
client_session: Optional[httpx.Client] = None
aclient_session: Optional[httpx.AsyncClient] = None
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
model_cost_map_url: str = (
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
)
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
suppress_debug_info = False
dynamodb_table_name: Optional[str] = None
s3_callback_params: Optional[Dict] = None
@ -285,9 +277,7 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
custom_prometheus_metadata_labels: List[str] = []
#### REQUEST PRIORITIZATION ####
priority_reservation: Optional[Dict[str, float]] = None
force_ipv4: bool = (
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
)
force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
module_level_aclient = AsyncHTTPHandler(
timeout=request_timeout, client_alias="module level aclient"
)
@ -301,13 +291,13 @@ fallbacks: Optional[List] = None
context_window_fallbacks: Optional[List] = None
content_policy_fallbacks: Optional[List] = None
allowed_fails: int = 3
num_retries_per_request: Optional[int] = (
None # for the request overall (incl. fallbacks + model retries)
)
num_retries_per_request: Optional[
int
] = None # for the request overall (incl. fallbacks + model retries)
####### SECRET MANAGERS #####################
secret_manager_client: Optional[Any] = (
None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
)
secret_manager_client: Optional[
Any
] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
_google_kms_resource_name: Optional[str] = None
_key_management_system: Optional[KeyManagementSystem] = None
_key_management_settings: KeyManagementSettings = KeyManagementSettings()
@ -813,6 +803,7 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig
from .llms.maritalk import MaritalkConfig
from .llms.openrouter.chat.transformation import OpenrouterConfig
from .llms.anthropic.chat.transformation import AnthropicConfig
from .llms.anthropic.common_utils import AnthropicModelInfo
from .llms.groq.stt.transformation import GroqSTTConfig
from .llms.anthropic.completion.transformation import AnthropicTextConfig
from .llms.triton.completion.transformation import TritonConfig
@ -848,6 +839,7 @@ from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
VertexGeminiConfig as VertexAIConfig,
)
from .llms.gemini.common_utils import GeminiModelInfo
from .llms.gemini.chat.transformation import (
GoogleAIStudioGeminiConfig,
GoogleAIStudioGeminiConfig as GeminiConfig, # aliased to maintain backwards compatibility
@ -950,6 +942,12 @@ openaiOSeriesConfig = OpenAIOSeriesConfig()
from .llms.openai.chat.gpt_transformation import (
OpenAIGPTConfig,
)
from .llms.openai.transcriptions.whisper_transformation import (
OpenAIWhisperAudioTranscriptionConfig,
)
from .llms.openai.transcriptions.gpt_transformation import (
OpenAIGPTAudioTranscriptionConfig,
)
openAIGPTConfig = OpenAIGPTConfig()
from .llms.openai.chat.gpt_audio_transformation import (
@ -978,6 +976,7 @@ from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
from .llms.friendliai.chat.transformation import FriendliaiChatConfig
from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
from .llms.xai.chat.transformation import XAIChatConfig
from .llms.xai.common_utils import XAIModelInfo
from .llms.volcengine import VolcEngineConfig
from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
from .llms.azure.azure import (
@ -1047,10 +1046,10 @@ from .types.llms.custom_llm import CustomLLMItem
from .types.utils import GenericStreamingChunk
custom_provider_map: List[CustomLLMItem] = []
_custom_providers: List[str] = (
[]
) # internal helper util, used to track names of custom providers
disable_hf_tokenizer_download: Optional[bool] = (
None # disable huggingface tokenizer download. Defaults to openai clk100
)
_custom_providers: List[
str
] = [] # internal helper util, used to track names of custom providers
disable_hf_tokenizer_download: Optional[
bool
] = None # disable huggingface tokenizer download. Defaults to openai clk100
global_disable_no_log_param: bool = False

View file

@ -1,6 +1,7 @@
import json
import logging
import os
import sys
from datetime import datetime
from logging import Formatter
@ -40,9 +41,56 @@ class JsonFormatter(Formatter):
return json.dumps(json_record)
# Function to set up exception handlers for JSON logging
def _setup_json_exception_handlers(formatter):
# Create a handler with JSON formatting for exceptions
error_handler = logging.StreamHandler()
error_handler.setFormatter(formatter)
# Setup excepthook for uncaught exceptions
def json_excepthook(exc_type, exc_value, exc_traceback):
record = logging.LogRecord(
name="LiteLLM",
level=logging.ERROR,
pathname="",
lineno=0,
msg=str(exc_value),
args=(),
exc_info=(exc_type, exc_value, exc_traceback),
)
error_handler.handle(record)
sys.excepthook = json_excepthook
# Configure asyncio exception handler if possible
try:
import asyncio
def async_json_exception_handler(loop, context):
exception = context.get("exception")
if exception:
record = logging.LogRecord(
name="LiteLLM",
level=logging.ERROR,
pathname="",
lineno=0,
msg=str(exception),
args=(),
exc_info=None,
)
error_handler.handle(record)
else:
loop.default_exception_handler(context)
asyncio.get_event_loop().set_exception_handler(async_json_exception_handler)
except Exception:
pass
# Create a formatter and set it for the handler
if json_logs:
handler.setFormatter(JsonFormatter())
_setup_json_exception_handlers(JsonFormatter())
else:
formatter = logging.Formatter(
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
@ -65,18 +113,24 @@ def _turn_on_json():
handler = logging.StreamHandler()
handler.setFormatter(JsonFormatter())
# Define a list of the loggers to update
loggers = [verbose_router_logger, verbose_proxy_logger, verbose_logger]
# Define all loggers to update, including root logger
loggers = [logging.getLogger()] + [
verbose_router_logger,
verbose_proxy_logger,
verbose_logger,
]
# Iterate through each logger and update its handlers
for logger in loggers:
# Remove all existing handlers
for h in logger.handlers[:]:
logger.removeHandler(h)
# Add the new handler
logger.addHandler(handler)
# Set up exception handlers
_setup_json_exception_handlers(JsonFormatter())
def _turn_on_debug():
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug

View file

@ -202,6 +202,7 @@ def init_redis_cluster(redis_kwargs) -> redis.RedisCluster:
def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
sentinel_nodes = redis_kwargs.get("sentinel_nodes")
sentinel_password = redis_kwargs.get("sentinel_password")
service_name = redis_kwargs.get("service_name")
if not sentinel_nodes or not service_name:
@ -212,7 +213,11 @@ def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.")
# Set up the Sentinel client
sentinel = redis.Sentinel(sentinel_nodes, socket_timeout=0.1)
sentinel = redis.Sentinel(
sentinel_nodes,
socket_timeout=0.1,
password=sentinel_password,
)
# Return the master instance for the given service

View file

@ -15,7 +15,7 @@ from .types.services import ServiceLoggerPayload, ServiceTypes
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
OTELClass = OpenTelemetry
else:
Span = Any

View file

@ -153,7 +153,6 @@ def create_batch(
)
api_base: Optional[str] = None
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
@ -358,7 +357,6 @@ def retrieve_batch(
_is_async = kwargs.pop("aretrieve_batch", False) is True
api_base: Optional[str] = None
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base

View file

@ -9,12 +9,12 @@ Has 4 methods:
"""
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Optional, Union
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
else:
Span = Any

View file

@ -66,9 +66,7 @@ class CachingHandlerResponse(BaseModel):
cached_result: Optional[Any] = None
final_embedding_cached_response: Optional[EmbeddingResponse] = None
embedding_all_elements_cache_hit: bool = (
False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
)
embedding_all_elements_cache_hit: bool = False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
class LLMCachingHandler:
@ -738,7 +736,6 @@ class LLMCachingHandler:
if self._should_store_result_in_cache(
original_function=self.original_function, kwargs=new_kwargs
):
litellm.cache.add_cache(result, **new_kwargs)
return
@ -865,9 +862,9 @@ class LLMCachingHandler:
}
if litellm.cache is not None:
litellm_params["preset_cache_key"] = (
litellm.cache._get_preset_cache_key_from_kwargs(**kwargs)
)
litellm_params[
"preset_cache_key"
] = litellm.cache._get_preset_cache_key_from_kwargs(**kwargs)
else:
litellm_params["preset_cache_key"] = None

View file

@ -1,12 +1,12 @@
import json
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Optional, Union
from .base_cache import BaseCache
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
else:
Span = Any

View file

@ -12,7 +12,7 @@ import asyncio
import time
import traceback
from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING, Any, List, Optional
from typing import TYPE_CHECKING, Any, List, Optional, Union
import litellm
from litellm._logging import print_verbose, verbose_logger
@ -24,7 +24,7 @@ from .redis_cache import RedisCache
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
else:
Span = Any

View file

@ -8,7 +8,6 @@ from .in_memory_cache import InMemoryCache
class LLMClientCache(InMemoryCache):
def update_cache_key_with_event_loop(self, key):
"""
Add the event loop to the cache key, to prevent event loop closed errors.

View file

@ -34,7 +34,7 @@ if TYPE_CHECKING:
cluster_pipeline = ClusterPipeline
async_redis_client = Redis
async_redis_cluster_client = RedisCluster
Span = _Span
Span = Union[_Span, Any]
else:
pipeline = Any
cluster_pipeline = Any
@ -57,7 +57,6 @@ class RedisCache(BaseCache):
socket_timeout: Optional[float] = 5.0, # default 5 second timeout
**kwargs,
):
from litellm._service_logger import ServiceLogging
from .._redis import get_redis_client, get_redis_connection_pool
@ -1045,3 +1044,109 @@ class RedisCache(BaseCache):
except Exception as e:
verbose_logger.debug(f"Redis TTL Error: {e}")
return None
async def async_rpush(
self,
key: str,
values: List[Any],
parent_otel_span: Optional[Span] = None,
**kwargs,
) -> int:
"""
Append one or multiple values to a list stored at key
Args:
key: The Redis key of the list
values: One or more values to append to the list
parent_otel_span: Optional parent OpenTelemetry span
Returns:
int: The length of the list after the push operation
"""
_redis_client: Any = self.init_async_client()
start_time = time.time()
try:
response = await _redis_client.rpush(key, *values)
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_rpush",
)
)
return response
except Exception as e:
# NON blocking - notify users Redis is throwing an exception
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_rpush",
)
)
verbose_logger.error(
f"LiteLLM Redis Cache RPUSH: - Got exception from REDIS : {str(e)}"
)
raise e
async def async_lpop(
self,
key: str,
count: Optional[int] = None,
parent_otel_span: Optional[Span] = None,
**kwargs,
) -> Union[Any, List[Any]]:
_redis_client: Any = self.init_async_client()
start_time = time.time()
print_verbose(f"LPOP from Redis list: key: {key}, count: {count}")
try:
result = await _redis_client.lpop(key, count)
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_lpop",
)
)
# Handle result parsing if needed
if isinstance(result, bytes):
try:
return result.decode("utf-8")
except Exception:
return result
elif isinstance(result, list) and all(
isinstance(item, bytes) for item in result
):
try:
return [item.decode("utf-8") for item in result]
except Exception:
return result
return result
except Exception as e:
# NON blocking - notify users Redis is throwing an exception
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_lpop",
)
)
verbose_logger.error(
f"LiteLLM Redis Cache LPOP: - Got exception from REDIS : {str(e)}"
)
raise e

View file

@ -5,7 +5,7 @@ Key differences:
- RedisClient NEEDs to be re-used across requests, adds 3000ms latency if it's re-created
"""
from typing import TYPE_CHECKING, Any, List, Optional
from typing import TYPE_CHECKING, Any, List, Optional, Union
from litellm.caching.redis_cache import RedisCache
@ -16,7 +16,7 @@ if TYPE_CHECKING:
pipeline = Pipeline
async_redis_client = Redis
Span = _Span
Span = Union[_Span, Any]
else:
pipeline = Any
async_redis_client = Any

View file

@ -13,11 +13,15 @@ import ast
import asyncio
import json
import os
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, cast
import litellm
from litellm._logging import print_verbose
from litellm.litellm_core_utils.prompt_templates.common_utils import get_str_from_messages
from litellm.litellm_core_utils.prompt_templates.common_utils import (
get_str_from_messages,
)
from litellm.types.utils import EmbeddingResponse
from .base_cache import BaseCache
@ -87,14 +91,16 @@ class RedisSemanticCache(BaseCache):
if redis_url is None:
try:
# Attempt to use provided parameters or fallback to environment variables
host = host or os.environ['REDIS_HOST']
port = port or os.environ['REDIS_PORT']
password = password or os.environ['REDIS_PASSWORD']
host = host or os.environ["REDIS_HOST"]
port = port or os.environ["REDIS_PORT"]
password = password or os.environ["REDIS_PASSWORD"]
except KeyError as e:
# Raise a more informative exception if any of the required keys are missing
missing_var = e.args[0]
raise ValueError(f"Missing required Redis configuration: {missing_var}. "
f"Provide {missing_var} or redis_url.") from e
raise ValueError(
f"Missing required Redis configuration: {missing_var}. "
f"Provide {missing_var} or redis_url."
) from e
redis_url = f"redis://:{password}@{host}:{port}"
@ -137,10 +143,13 @@ class RedisSemanticCache(BaseCache):
List[float]: The embedding vector
"""
# Create an embedding from prompt
embedding_response = litellm.embedding(
embedding_response = cast(
EmbeddingResponse,
litellm.embedding(
model=self.embedding_model,
input=prompt,
cache={"no-store": True, "no-cache": True},
),
)
embedding = embedding_response["data"][0]["embedding"]
return embedding
@ -186,6 +195,7 @@ class RedisSemanticCache(BaseCache):
"""
print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}")
value_str: Optional[str] = None
try:
# Extract the prompt from messages
messages = kwargs.get("messages", [])
@ -203,7 +213,9 @@ class RedisSemanticCache(BaseCache):
else:
self.llmcache.store(prompt, value_str)
except Exception as e:
print_verbose(f"Error setting {value_str} in the Redis semantic cache: {str(e)}")
print_verbose(
f"Error setting {value_str or value} in the Redis semantic cache: {str(e)}"
)
def get_cache(self, key: str, **kwargs) -> Any:
"""
@ -336,13 +348,13 @@ class RedisSemanticCache(BaseCache):
prompt,
value_str,
vector=prompt_embedding, # Pass through custom embedding
ttl=ttl
ttl=ttl,
)
else:
await self.llmcache.astore(
prompt,
value_str,
vector=prompt_embedding # Pass through custom embedding
vector=prompt_embedding, # Pass through custom embedding
)
except Exception as e:
print_verbose(f"Error in async_set_cache: {str(e)}")
@ -374,14 +386,13 @@ class RedisSemanticCache(BaseCache):
prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
# Check the cache for semantically similar prompts
results = await self.llmcache.acheck(
prompt=prompt,
vector=prompt_embedding
)
results = await self.llmcache.acheck(prompt=prompt, vector=prompt_embedding)
# handle results / cache hit
if not results:
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 # TODO why here but not above??
kwargs.setdefault("metadata", {})[
"semantic-similarity"
] = 0.0 # TODO why here but not above??
return None
cache_hit = results[0]
@ -420,7 +431,9 @@ class RedisSemanticCache(BaseCache):
aindex = await self.llmcache._get_async_index()
return await aindex.info()
async def async_set_cache_pipeline(self, cache_list: List[Tuple[str, Any]], **kwargs) -> None:
async def async_set_cache_pipeline(
self, cache_list: List[Tuple[str, Any]], **kwargs
) -> None:
"""
Asynchronously store multiple values in the semantic cache.

View file

@ -123,7 +123,7 @@ class S3Cache(BaseCache):
) # Convert string to dictionary
except Exception:
cached_response = ast.literal_eval(cached_response)
if type(cached_response) is not dict:
if not isinstance(cached_response, dict):
cached_response = dict(cached_response)
verbose_logger.debug(
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"

View file

@ -4,9 +4,11 @@ ROUTER_MAX_FALLBACKS = 5
DEFAULT_BATCH_SIZE = 512
DEFAULT_FLUSH_INTERVAL_SECONDS = 5
DEFAULT_MAX_RETRIES = 2
DEFAULT_MAX_RECURSE_DEPTH = 10
DEFAULT_FAILURE_THRESHOLD_PERCENT = (
0.5 # default cooldown a deployment if 50% of requests fail in a given minute
)
DEFAULT_MAX_TOKENS = 4096
DEFAULT_REDIS_SYNC_INTERVAL = 1
DEFAULT_COOLDOWN_TIME_SECONDS = 5
DEFAULT_REPLICATE_POLLING_RETRIES = 5
@ -16,6 +18,8 @@ DEFAULT_IMAGE_WIDTH = 300
DEFAULT_IMAGE_HEIGHT = 300
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
#### RELIABILITY ####
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
#### Networking settings ####
@ -414,6 +418,7 @@ RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when conv
########################### Logging Callback Constants ###########################
AZURE_STORAGE_MSFT_VERSION = "2019-07-07"
MCP_TOOL_NAME_PREFIX = "mcp_tool"
########################### LiteLLM Proxy Specific Constants ###########################
########################################################################################
@ -441,3 +446,7 @@ HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
LITELLM_PROXY_ADMIN_NAME = "default_user_id"
########################### DB CRON JOB NAMES ###########################
DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job"
DEFAULT_CRON_JOB_LOCK_TTL_SECONDS = 60 # 1 minute

View file

@ -2,7 +2,7 @@
## File for 'response_cost' calculation in Logging
import time
from functools import lru_cache
from typing import Any, List, Literal, Optional, Tuple, Union
from typing import Any, List, Literal, Optional, Tuple, Union, cast
from pydantic import BaseModel
@ -275,15 +275,13 @@ def cost_per_token( # noqa: PLR0915
custom_llm_provider=custom_llm_provider,
prompt_characters=prompt_characters,
completion_characters=completion_characters,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
usage=usage_block,
)
elif cost_router == "cost_per_token":
return google_cost_per_token(
model=model_without_prefix,
custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
usage=usage_block,
)
elif custom_llm_provider == "anthropic":
return anthropic_cost_per_token(model=model, usage=usage_block)
@ -464,13 +462,36 @@ def _model_contains_known_llm_provider(model: str) -> bool:
def _get_usage_object(
completion_response: Any,
) -> Optional[Usage]:
usage_obj: Optional[Usage] = None
if completion_response is not None and isinstance(
completion_response, ModelResponse
):
usage_obj = completion_response.get("usage")
usage_obj = cast(
Union[Usage, ResponseAPIUsage, dict, BaseModel],
(
completion_response.get("usage")
if isinstance(completion_response, dict)
else getattr(completion_response, "get", lambda x: None)("usage")
),
)
if usage_obj is None:
return None
if isinstance(usage_obj, Usage):
return usage_obj
elif (
usage_obj is not None
and (isinstance(usage_obj, dict) or isinstance(usage_obj, ResponseAPIUsage))
and ResponseAPILoggingUtils._is_response_api_usage(usage_obj)
):
return ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage_obj
)
elif isinstance(usage_obj, dict):
return Usage(**usage_obj)
elif isinstance(usage_obj, BaseModel):
return Usage(**usage_obj.model_dump())
else:
verbose_logger.debug(
f"Unknown usage object type: {type(usage_obj)}, usage_obj: {usage_obj}"
)
return None
def _is_known_usage_objects(usage_obj):
@ -559,7 +580,6 @@ def completion_cost( # noqa: PLR0915
- For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
"""
try:
call_type = _infer_call_type(call_type, completion_response) or "completion"
if (
@ -664,6 +684,7 @@ def completion_cost( # noqa: PLR0915
elif len(prompt) > 0:
prompt_tokens = token_counter(model=model, text=prompt)
completion_tokens = token_counter(model=model, text=completion)
if model is None:
raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
@ -828,11 +849,14 @@ def get_response_cost_from_hidden_params(
_hidden_params_dict = hidden_params
additional_headers = _hidden_params_dict.get("additional_headers", {})
if additional_headers and "x-litellm-response-cost" in additional_headers:
response_cost = additional_headers["x-litellm-response-cost"]
if (
additional_headers
and "llm_provider-x-litellm-response-cost" in additional_headers
):
response_cost = additional_headers["llm_provider-x-litellm-response-cost"]
if response_cost is None:
return None
return float(additional_headers["x-litellm-response-cost"])
return float(additional_headers["llm_provider-x-litellm-response-cost"])
return None

View file

@ -1,5 +1,5 @@
import json
from typing import List, Literal, Union
from typing import Dict, List, Literal, Union
from mcp import ClientSession
from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
@ -76,8 +76,8 @@ def _get_function_arguments(function: FunctionDefinition) -> dict:
return arguments if isinstance(arguments, dict) else {}
def _transform_openai_tool_call_to_mcp_tool_call_request(
openai_tool: ChatCompletionMessageToolCall,
def transform_openai_tool_call_request_to_mcp_tool_call_request(
openai_tool: Union[ChatCompletionMessageToolCall, Dict],
) -> MCPCallToolRequestParams:
"""Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams."""
function = openai_tool["function"]
@ -100,9 +100,11 @@ async def call_openai_tool(
Returns:
The result of the MCP tool call.
"""
mcp_tool_call_request_params = _transform_openai_tool_call_to_mcp_tool_call_request(
mcp_tool_call_request_params = (
transform_openai_tool_call_request_to_mcp_tool_call_request(
openai_tool=openai_tool,
)
)
return await call_mcp_tool(
session=session,
call_tool_request_params=mcp_tool_call_request_params,

View file

@ -138,7 +138,6 @@ def create_fine_tuning_job(
# OpenAI
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
@ -360,7 +359,6 @@ def cancel_fine_tuning_job(
# OpenAI
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
@ -522,7 +520,6 @@ def list_fine_tuning_jobs(
# OpenAI
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base

View file

@ -19,7 +19,6 @@ else:
def squash_payloads(queue):
squashed = {}
if len(queue) == 0:
return squashed

View file

@ -195,13 +195,16 @@ class SlackAlerting(CustomBatchLogger):
if self.alerting is None or self.alert_types is None:
return
time_difference_float, model, api_base, messages = (
self._response_taking_too_long_callback_helper(
(
time_difference_float,
model,
api_base,
messages,
) = self._response_taking_too_long_callback_helper(
kwargs=kwargs,
start_time=start_time,
end_time=end_time,
)
)
if litellm.turn_off_message_logging or litellm.redact_messages_in_exceptions:
messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
@ -819,9 +822,9 @@ class SlackAlerting(CustomBatchLogger):
### UNIQUE CACHE KEY ###
cache_key = provider + region_name
outage_value: Optional[ProviderRegionOutageModel] = (
await self.internal_usage_cache.async_get_cache(key=cache_key)
)
outage_value: Optional[
ProviderRegionOutageModel
] = await self.internal_usage_cache.async_get_cache(key=cache_key)
if (
getattr(exception, "status_code", None) is None
@ -1402,9 +1405,9 @@ Model Info:
self.alert_to_webhook_url is not None
and alert_type in self.alert_to_webhook_url
):
slack_webhook_url: Optional[Union[str, List[str]]] = (
self.alert_to_webhook_url[alert_type]
)
slack_webhook_url: Optional[
Union[str, List[str]]
] = self.alert_to_webhook_url[alert_type]
elif self.default_webhook_url is not None:
slack_webhook_url = self.default_webhook_url
else:
@ -1768,7 +1771,6 @@ Model Info:
- Team Created, Updated, Deleted
"""
try:
message = f"`{event_name}`\n"
key_event_dict = key_event.model_dump()

View file

@ -98,7 +98,6 @@ class ArgillaLogger(CustomBatchLogger):
argilla_dataset_name: Optional[str],
argilla_base_url: Optional[str],
) -> ArgillaCredentialsObject:
_credentials_api_key = argilla_api_key or os.getenv("ARGILLA_API_KEY")
if _credentials_api_key is None:
raise Exception("Invalid Argilla API Key given. _credentials_api_key=None.")

View file

@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Optional, Union
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
@ -7,7 +7,7 @@ from litellm.types.utils import StandardLoggingPayload
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
else:
Span = Any

View file

@ -19,14 +19,13 @@ if TYPE_CHECKING:
from litellm.types.integrations.arize import Protocol as _Protocol
Protocol = _Protocol
Span = _Span
Span = Union[_Span, Any]
else:
Protocol = Any
Span = Any
class ArizeLogger(OpenTelemetry):
def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
ArizeLogger.set_arize_attributes(span, kwargs, response_obj)
return

View file

@ -1,17 +1,20 @@
import os
from typing import TYPE_CHECKING, Any
from litellm.integrations.arize import _utils
from typing import TYPE_CHECKING, Any, Union
from litellm._logging import verbose_logger
from litellm.integrations.arize import _utils
from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig
if TYPE_CHECKING:
from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
from litellm.types.integrations.arize import Protocol as _Protocol
from opentelemetry.trace import Span as _Span
from litellm.types.integrations.arize import Protocol as _Protocol
from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
Protocol = _Protocol
OpenTelemetryConfig = _OpenTelemetryConfig
Span = _Span
Span = Union[_Span, Any]
else:
Protocol = Any
OpenTelemetryConfig = Any
@ -20,6 +23,7 @@ else:
ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://app.phoenix.arize.com/v1/traces"
class ArizePhoenixLogger:
@staticmethod
def set_arize_phoenix_attributes(span: Span, kwargs, response_obj):
@ -59,15 +63,14 @@ class ArizePhoenixLogger:
# a slightly different auth header format than self hosted phoenix
if endpoint == ARIZE_HOSTED_PHOENIX_ENDPOINT:
if api_key is None:
raise ValueError("PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used.")
raise ValueError(
"PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used."
)
otlp_auth_headers = f"api_key={api_key}"
elif api_key is not None:
# api_key/auth is optional for self hosted phoenix
otlp_auth_headers = f"Authorization=Bearer {api_key}"
return ArizePhoenixConfig(
otlp_auth_headers=otlp_auth_headers,
protocol=protocol,
endpoint=endpoint
otlp_auth_headers=otlp_auth_headers, protocol=protocol, endpoint=endpoint
)

View file

@ -12,7 +12,10 @@ class AthinaLogger:
"athina-api-key": self.athina_api_key,
"Content-Type": "application/json",
}
self.athina_logging_url = os.getenv("ATHINA_BASE_URL", "https://log.athina.ai") + "/api/v1/log/inference"
self.athina_logging_url = (
os.getenv("ATHINA_BASE_URL", "https://log.athina.ai")
+ "/api/v1/log/inference"
)
self.additional_keys = [
"environment",
"prompt_slug",

View file

@ -50,12 +50,12 @@ class AzureBlobStorageLogger(CustomBatchLogger):
self.azure_storage_file_system: str = _azure_storage_file_system
# Internal variables used for Token based authentication
self.azure_auth_token: Optional[str] = (
None # the Azure AD token to use for Azure Storage API requests
)
self.token_expiry: Optional[datetime] = (
None # the expiry time of the currentAzure AD token
)
self.azure_auth_token: Optional[
str
] = None # the Azure AD token to use for Azure Storage API requests
self.token_expiry: Optional[
datetime
] = None # the expiry time of the currentAzure AD token
asyncio.create_task(self.periodic_flush())
self.flush_lock = asyncio.Lock()
@ -153,7 +153,6 @@ class AzureBlobStorageLogger(CustomBatchLogger):
3. Flush the data
"""
try:
if self.azure_storage_account_key:
await self.upload_to_azure_data_lake_with_azure_account_key(
payload=payload

View file

@ -4,7 +4,7 @@
import copy
import os
from datetime import datetime
from typing import Optional, Dict
from typing import Dict, Optional
import httpx
from pydantic import BaseModel
@ -19,7 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.utils import print_verbose
global_braintrust_http_handler = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback)
global_braintrust_http_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback
)
global_braintrust_sync_http_handler = HTTPHandler()
API_BASE = "https://api.braintrustdata.com/v1"
@ -35,7 +37,9 @@ def get_utc_datetime():
class BraintrustLogger(CustomLogger):
def __init__(self, api_key: Optional[str] = None, api_base: Optional[str] = None) -> None:
def __init__(
self, api_key: Optional[str] = None, api_base: Optional[str] = None
) -> None:
super().__init__()
self.validate_environment(api_key=api_key)
self.api_base = api_base or API_BASE
@ -45,7 +49,9 @@ class BraintrustLogger(CustomLogger):
"Authorization": "Bearer " + self.api_key,
"Content-Type": "application/json",
}
self._project_id_cache: Dict[str, str] = {} # Cache mapping project names to IDs
self._project_id_cache: Dict[
str, str
] = {} # Cache mapping project names to IDs
def validate_environment(self, api_key: Optional[str]):
"""
@ -71,7 +77,9 @@ class BraintrustLogger(CustomLogger):
try:
response = global_braintrust_sync_http_handler.post(
f"{self.api_base}/project", headers=self.headers, json={"name": project_name}
f"{self.api_base}/project",
headers=self.headers,
json={"name": project_name},
)
project_dict = response.json()
project_id = project_dict["id"]
@ -89,7 +97,9 @@ class BraintrustLogger(CustomLogger):
try:
response = await global_braintrust_http_handler.post(
f"{self.api_base}/project/register", headers=self.headers, json={"name": project_name}
f"{self.api_base}/project/register",
headers=self.headers,
json={"name": project_name},
)
project_dict = response.json()
project_id = project_dict["id"]
@ -116,15 +126,21 @@ class BraintrustLogger(CustomLogger):
if metadata is None:
metadata = {}
proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
for metadata_param_key in proxy_headers:
if metadata_param_key.startswith("braintrust"):
trace_param_key = metadata_param_key.replace("braintrust", "", 1)
if trace_param_key in metadata:
verbose_logger.warning(f"Overwriting Braintrust `{trace_param_key}` from request header")
verbose_logger.warning(
f"Overwriting Braintrust `{trace_param_key}` from request header"
)
else:
verbose_logger.debug(f"Found Braintrust `{trace_param_key}` in request header")
verbose_logger.debug(
f"Found Braintrust `{trace_param_key}` in request header"
)
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
return metadata
@ -157,24 +173,35 @@ class BraintrustLogger(CustomLogger):
output = None
choices = []
if response_obj is not None and (
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
kwargs.get("call_type", None) == "embedding"
or isinstance(response_obj, litellm.EmbeddingResponse)
):
output = None
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
elif response_obj is not None and isinstance(
response_obj, litellm.ModelResponse
):
output = response_obj["choices"][0]["message"].json()
choices = response_obj["choices"]
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
elif response_obj is not None and isinstance(
response_obj, litellm.TextCompletionResponse
):
output = response_obj.choices[0].text
choices = response_obj.choices
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
elif response_obj is not None and isinstance(
response_obj, litellm.ImageResponse
):
output = response_obj["data"]
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata)
clean_metadata = {}
try:
metadata = copy.deepcopy(metadata) # Avoid modifying the original metadata
metadata = copy.deepcopy(
metadata
) # Avoid modifying the original metadata
except Exception:
new_metadata = {}
for key, value in metadata.items():
@ -192,7 +219,9 @@ class BraintrustLogger(CustomLogger):
project_id = metadata.get("project_id")
if project_id is None:
project_name = metadata.get("project_name")
project_id = self.get_project_id_sync(project_name) if project_name else None
project_id = (
self.get_project_id_sync(project_name) if project_name else None
)
if project_id is None:
if self.default_project_id is None:
@ -234,7 +263,8 @@ class BraintrustLogger(CustomLogger):
"completion_tokens": usage_obj.completion_tokens,
"total_tokens": usage_obj.total_tokens,
"total_cost": cost,
"time_to_first_token": end_time.timestamp() - start_time.timestamp(),
"time_to_first_token": end_time.timestamp()
- start_time.timestamp(),
"start": start_time.timestamp(),
"end": end_time.timestamp(),
}
@ -255,7 +285,9 @@ class BraintrustLogger(CustomLogger):
request_data["metrics"] = metrics
try:
print_verbose(f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}")
print_verbose(
f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}"
)
global_braintrust_sync_http_handler.post(
url=f"{self.api_base}/project_logs/{project_id}/insert",
json={"events": [request_data]},
@ -276,20 +308,29 @@ class BraintrustLogger(CustomLogger):
output = None
choices = []
if response_obj is not None and (
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
kwargs.get("call_type", None) == "embedding"
or isinstance(response_obj, litellm.EmbeddingResponse)
):
output = None
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
elif response_obj is not None and isinstance(
response_obj, litellm.ModelResponse
):
output = response_obj["choices"][0]["message"].json()
choices = response_obj["choices"]
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
elif response_obj is not None and isinstance(
response_obj, litellm.TextCompletionResponse
):
output = response_obj.choices[0].text
choices = response_obj.choices
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
elif response_obj is not None and isinstance(
response_obj, litellm.ImageResponse
):
output = response_obj["data"]
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata)
clean_metadata = {}
new_metadata = {}
@ -313,7 +354,11 @@ class BraintrustLogger(CustomLogger):
project_id = metadata.get("project_id")
if project_id is None:
project_name = metadata.get("project_name")
project_id = await self.get_project_id_async(project_name) if project_name else None
project_id = (
await self.get_project_id_async(project_name)
if project_name
else None
)
if project_id is None:
if self.default_project_id is None:
@ -362,8 +407,14 @@ class BraintrustLogger(CustomLogger):
api_call_start_time = kwargs.get("api_call_start_time")
completion_start_time = kwargs.get("completion_start_time")
if api_call_start_time is not None and completion_start_time is not None:
metrics["time_to_first_token"] = completion_start_time.timestamp() - api_call_start_time.timestamp()
if (
api_call_start_time is not None
and completion_start_time is not None
):
metrics["time_to_first_token"] = (
completion_start_time.timestamp()
- api_call_start_time.timestamp()
)
request_data = {
"id": litellm_call_id,

View file

@ -14,7 +14,6 @@ from litellm.integrations.custom_logger import CustomLogger
class CustomBatchLogger(CustomLogger):
def __init__(
self,
flush_lock: Optional[asyncio.Lock] = None,

View file

@ -7,7 +7,6 @@ from litellm.types.utils import StandardLoggingGuardrailInformation
class CustomGuardrail(CustomLogger):
def __init__(
self,
guardrail_name: Optional[str] = None,

View file

@ -31,7 +31,7 @@ from litellm.types.utils import (
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
else:
Span = Any

View file

@ -233,7 +233,6 @@ class DataDogLogger(
pass
async def _log_async_event(self, kwargs, response_obj, start_time, end_time):
dd_payload = self.create_datadog_logging_payload(
kwargs=kwargs,
response_obj=response_obj,

View file

@ -125,9 +125,9 @@ class GCSBucketBase(CustomBatchLogger):
if kwargs is None:
kwargs = {}
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
kwargs.get("standard_callback_dynamic_params", None)
)
standard_callback_dynamic_params: Optional[
StandardCallbackDynamicParams
] = kwargs.get("standard_callback_dynamic_params", None)
bucket_name: str
path_service_account: Optional[str]

View file

@ -70,13 +70,14 @@ class GcsPubSubLogger(CustomBatchLogger):
"""Construct authorization headers using Vertex AI auth"""
from litellm import vertex_chat_completion
_auth_header, vertex_project = (
await vertex_chat_completion._ensure_access_token_async(
(
_auth_header,
vertex_project,
) = await vertex_chat_completion._ensure_access_token_async(
credentials=self.path_service_account_json,
project_id=None,
custom_llm_provider="vertex_ai",
)
)
auth_header, _ = vertex_chat_completion._get_token_and_url(
model="pub-sub",

View file

@ -155,11 +155,7 @@ class HumanloopLogger(CustomLogger):
prompt_id: str,
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[
str,
List[AllMessageValues],
dict,
]:
) -> Tuple[str, List[AllMessageValues], dict,]:
humanloop_api_key = dynamic_callback_params.get(
"humanloop_api_key"
) or get_secret_str("HUMANLOOP_API_KEY")

View file

@ -471,9 +471,9 @@ class LangFuseLogger:
# we clean out all extra litellm metadata params before logging
clean_metadata: Dict[str, Any] = {}
if prompt_management_metadata is not None:
clean_metadata["prompt_management_metadata"] = (
prompt_management_metadata
)
clean_metadata[
"prompt_management_metadata"
] = prompt_management_metadata
if isinstance(metadata, dict):
for key, value in metadata.items():
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy

View file

@ -19,7 +19,6 @@ else:
class LangFuseHandler:
@staticmethod
def get_langfuse_logger_for_request(
standard_callback_dynamic_params: StandardCallbackDynamicParams,
@ -87,7 +86,9 @@ class LangFuseHandler:
if globalLangfuseLogger is not None:
return globalLangfuseLogger
credentials_dict: Dict[str, Any] = (
credentials_dict: Dict[
str, Any
] = (
{}
) # the global langfuse logger uses Environment Variables, there are no dynamic credentials
globalLangfuseLogger = in_memory_dynamic_logger_cache.get_cache(

View file

@ -172,11 +172,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
prompt_id: str,
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[
str,
List[AllMessageValues],
dict,
]:
) -> Tuple[str, List[AllMessageValues], dict,]:
return self.get_chat_completion_prompt(
model,
messages,

View file

@ -75,7 +75,6 @@ class LangsmithLogger(CustomBatchLogger):
langsmith_project: Optional[str] = None,
langsmith_base_url: Optional[str] = None,
) -> LangsmithCredentialsObject:
_credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY")
if _credentials_api_key is None:
raise Exception(
@ -443,9 +442,9 @@ class LangsmithLogger(CustomBatchLogger):
Otherwise, use the default credentials.
"""
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
kwargs.get("standard_callback_dynamic_params", None)
)
standard_callback_dynamic_params: Optional[
StandardCallbackDynamicParams
] = kwargs.get("standard_callback_dynamic_params", None)
if standard_callback_dynamic_params is not None:
credentials = self.get_credentials_from_env(
langsmith_api_key=standard_callback_dynamic_params.get(
@ -481,7 +480,6 @@ class LangsmithLogger(CustomBatchLogger):
asyncio.run(self.async_send_batch())
def get_run_by_id(self, run_id):
langsmith_api_key = self.default_credentials["LANGSMITH_API_KEY"]
langsmith_api_base = self.default_credentials["LANGSMITH_BASE_URL"]

View file

@ -1,12 +1,12 @@
import json
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Union
from litellm.proxy._types import SpanAttributes
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
else:
Span = Any

View file

@ -20,7 +20,6 @@ def parse_tool_calls(tool_calls):
return None
def clean_tool_call(tool_call):
serialized = {
"type": tool_call.type,
"id": tool_call.id,
@ -36,7 +35,6 @@ def parse_tool_calls(tool_calls):
def parse_messages(input):
if input is None:
return None

View file

@ -48,14 +48,17 @@ class MlflowLogger(CustomLogger):
def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
try:
from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools
from mlflow.tracing.utils import set_span_chat_messages # type: ignore
from mlflow.tracing.utils import set_span_chat_tools # type: ignore
except ImportError:
return
inputs = self._construct_input(kwargs)
input_messages = inputs.get("messages", [])
output_messages = [c.message.model_dump(exclude_none=True)
for c in getattr(response_obj, "choices", [])]
output_messages = [
c.message.model_dump(exclude_none=True)
for c in getattr(response_obj, "choices", [])
]
if messages := [*input_messages, *output_messages]:
set_span_chat_messages(span, messages)
if tools := inputs.get("tools"):

View file

@ -1,7 +1,7 @@
import os
from dataclasses import dataclass
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
import litellm
from litellm._logging import verbose_logger
@ -23,10 +23,10 @@ if TYPE_CHECKING:
)
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
Span = _Span
SpanExporter = _SpanExporter
UserAPIKeyAuth = _UserAPIKeyAuth
ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload
Span = Union[_Span, Any]
SpanExporter = Union[_SpanExporter, Any]
UserAPIKeyAuth = Union[_UserAPIKeyAuth, Any]
ManagementEndpointLoggingPayload = Union[_ManagementEndpointLoggingPayload, Any]
else:
Span = Any
SpanExporter = Any
@ -46,7 +46,6 @@ LITELLM_REQUEST_SPAN_NAME = "litellm_request"
@dataclass
class OpenTelemetryConfig:
exporter: Union[str, SpanExporter] = "console"
endpoint: Optional[str] = None
headers: Optional[str] = None
@ -154,7 +153,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[datetime, float]] = None,
event_metadata: Optional[dict] = None,
):
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
@ -215,7 +213,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[float, datetime]] = None,
event_metadata: Optional[dict] = None,
):
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
@ -353,9 +350,9 @@ class OpenTelemetry(CustomLogger):
"""
from opentelemetry import trace
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
kwargs.get("standard_callback_dynamic_params")
)
standard_callback_dynamic_params: Optional[
StandardCallbackDynamicParams
] = kwargs.get("standard_callback_dynamic_params")
if not standard_callback_dynamic_params:
return
@ -722,7 +719,6 @@ class OpenTelemetry(CustomLogger):
span.set_attribute(key, primitive_value)
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {}
custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
@ -843,12 +839,14 @@ class OpenTelemetry(CustomLogger):
headers=dynamic_headers or self.OTEL_HEADERS
)
if isinstance(self.OTEL_EXPORTER, SpanExporter):
if hasattr(
self.OTEL_EXPORTER, "export"
): # Check if it has the export method that SpanExporter requires
verbose_logger.debug(
"OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s",
self.OTEL_EXPORTER,
)
return SimpleSpanProcessor(self.OTEL_EXPORTER)
return SimpleSpanProcessor(cast(SpanExporter, self.OTEL_EXPORTER))
if self.OTEL_EXPORTER == "console":
verbose_logger.debug(
@ -907,7 +905,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
@ -961,7 +958,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode

View file

@ -185,7 +185,6 @@ class OpikLogger(CustomBatchLogger):
def _create_opik_payload( # noqa: PLR0915
self, kwargs, response_obj, start_time, end_time
) -> List[Dict]:
# Get metadata
_litellm_params = kwargs.get("litellm_params", {}) or {}
litellm_params_metadata = _litellm_params.get("metadata", {}) or {}

View file

@ -988,9 +988,9 @@ class PrometheusLogger(CustomLogger):
):
try:
verbose_logger.debug("setting remaining tokens requests metric")
standard_logging_payload: Optional[StandardLoggingPayload] = (
request_kwargs.get("standard_logging_object")
)
standard_logging_payload: Optional[
StandardLoggingPayload
] = request_kwargs.get("standard_logging_object")
if standard_logging_payload is None:
return

View file

@ -14,7 +14,6 @@ class PromptManagementClient(TypedDict):
class PromptManagementBase(ABC):
@property
@abstractmethod
def integration_name(self) -> str:
@ -83,11 +82,7 @@ class PromptManagementBase(ABC):
prompt_id: str,
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[
str,
List[AllMessageValues],
dict,
]:
) -> Tuple[str, List[AllMessageValues], dict,]:
if not self.should_run_prompt_management(
prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params
):

View file

@ -38,7 +38,7 @@ class S3Logger:
if litellm.s3_callback_params is not None:
# read in .env variables - example os.environ/AWS_BUCKET_NAME
for key, value in litellm.s3_callback_params.items():
if type(value) is str and value.startswith("os.environ/"):
if isinstance(value, str) and value.startswith("os.environ/"):
litellm.s3_callback_params[key] = litellm.get_secret(value)
# now set s3 params from litellm.s3_logger_params
s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")

View file

@ -21,11 +21,11 @@ try:
# contains a (known) object attribute
object: Literal["chat.completion", "edit", "text_completion"]
def __getitem__(self, key: K) -> V: ... # noqa
def __getitem__(self, key: K) -> V:
... # noqa
def get( # noqa
self, key: K, default: Optional[V] = None
) -> Optional[V]: ... # pragma: no cover
def get(self, key: K, default: Optional[V] = None) -> Optional[V]: # noqa
... # pragma: no cover
class OpenAIRequestResponseResolver:
def __call__(

View file

@ -10,7 +10,7 @@ from litellm.types.llms.openai import AllMessageValues
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
Span = _Span
Span = Union[_Span, Any]
else:
Span = Any

View file

@ -11,7 +11,9 @@ except (ImportError, AttributeError):
# Old way to access resources, which setuptools deprecated some time ago
import pkg_resources # type: ignore
filename = pkg_resources.resource_filename(__name__, "litellm_core_utils/tokenizers")
filename = pkg_resources.resource_filename(
__name__, "litellm_core_utils/tokenizers"
)
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
"CUSTOM_TIKTOKEN_CACHE_DIR", filename

View file

@ -79,6 +79,22 @@ def get_supported_openai_params( # noqa: PLR0915
elif custom_llm_provider == "maritalk":
return litellm.MaritalkConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "openai":
if request_type == "transcription":
transcription_provider_config = (
litellm.ProviderConfigManager.get_provider_audio_transcription_config(
model=model, provider=LlmProviders.OPENAI
)
)
if isinstance(
transcription_provider_config, litellm.OpenAIGPTAudioTranscriptionConfig
):
return transcription_provider_config.get_supported_openai_params(
model=model
)
else:
raise ValueError(
f"Unsupported provider config: {transcription_provider_config} for model: {model}"
)
return litellm.OpenAIConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "azure":
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):

View file

@ -67,6 +67,7 @@ from litellm.types.utils import (
StandardCallbackDynamicParams,
StandardLoggingAdditionalHeaders,
StandardLoggingHiddenParams,
StandardLoggingMCPToolCall,
StandardLoggingMetadata,
StandardLoggingModelCostFailureDebugInformation,
StandardLoggingModelInformation,
@ -239,9 +240,9 @@ class Logging(LiteLLMLoggingBaseClass):
self.litellm_trace_id = litellm_trace_id
self.function_id = function_id
self.streaming_chunks: List[Any] = [] # for generating complete stream response
self.sync_streaming_chunks: List[Any] = (
[]
) # for generating complete stream response
self.sync_streaming_chunks: List[
Any
] = [] # for generating complete stream response
self.log_raw_request_response = log_raw_request_response
# Initialize dynamic callbacks
@ -452,11 +453,13 @@ class Logging(LiteLLMLoggingBaseClass):
prompt_id: str,
prompt_variables: Optional[dict],
) -> Tuple[str, List[AllMessageValues], dict]:
custom_logger = self.get_custom_logger_for_prompt_management(model)
if custom_logger:
model, messages, non_default_params = (
custom_logger.get_chat_completion_prompt(
(
model,
messages,
non_default_params,
) = custom_logger.get_chat_completion_prompt(
model=model,
messages=messages,
non_default_params=non_default_params,
@ -464,7 +467,6 @@ class Logging(LiteLLMLoggingBaseClass):
prompt_variables=prompt_variables,
dynamic_callback_params=self.standard_callback_dynamic_params,
)
)
self.messages = messages
return model, messages, non_default_params
@ -541,12 +543,11 @@ class Logging(LiteLLMLoggingBaseClass):
model
): # if model name was changes pre-call, overwrite the initial model call name with the new one
self.model_call_details["model"] = model
self.model_call_details["litellm_params"]["api_base"] = (
self._get_masked_api_base(additional_args.get("api_base", ""))
)
self.model_call_details["litellm_params"][
"api_base"
] = self._get_masked_api_base(additional_args.get("api_base", ""))
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
# Log the exact input to the LLM API
litellm.error_logs["PRE_CALL"] = locals()
try:
@ -568,19 +569,16 @@ class Logging(LiteLLMLoggingBaseClass):
self.log_raw_request_response is True
or log_raw_request_response is True
):
_litellm_params = self.model_call_details.get("litellm_params", {})
_metadata = _litellm_params.get("metadata", {}) or {}
try:
# [Non-blocking Extra Debug Information in metadata]
if turn_off_message_logging is True:
_metadata["raw_request"] = (
"redacted by litellm. \
_metadata[
"raw_request"
] = "redacted by litellm. \
'litellm.turn_off_message_logging=True'"
)
else:
curl_command = self._get_request_curl_command(
api_base=additional_args.get("api_base", ""),
headers=additional_args.get("headers", {}),
@ -590,8 +588,9 @@ class Logging(LiteLLMLoggingBaseClass):
_metadata["raw_request"] = str(curl_command)
# split up, so it's easier to parse in the UI
self.model_call_details["raw_request_typed_dict"] = (
RawRequestTypedDict(
self.model_call_details[
"raw_request_typed_dict"
] = RawRequestTypedDict(
raw_request_api_base=str(
additional_args.get("api_base") or ""
),
@ -604,20 +603,19 @@ class Logging(LiteLLMLoggingBaseClass):
),
error=None,
)
)
except Exception as e:
self.model_call_details["raw_request_typed_dict"] = (
RawRequestTypedDict(
self.model_call_details[
"raw_request_typed_dict"
] = RawRequestTypedDict(
error=str(e),
)
)
traceback.print_exc()
_metadata["raw_request"] = (
"Unable to Log \
_metadata[
"raw_request"
] = "Unable to Log \
raw request: {}".format(
str(e)
)
)
if self.logger_fn and callable(self.logger_fn):
try:
self.logger_fn(
@ -941,9 +939,9 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug(
f"response_cost_failure_debug_information: {debug_info}"
)
self.model_call_details["response_cost_failure_debug_information"] = (
debug_info
)
self.model_call_details[
"response_cost_failure_debug_information"
] = debug_info
return None
try:
@ -968,9 +966,9 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug(
f"response_cost_failure_debug_information: {debug_info}"
)
self.model_call_details["response_cost_failure_debug_information"] = (
debug_info
)
self.model_call_details[
"response_cost_failure_debug_information"
] = debug_info
return None
@ -995,7 +993,6 @@ class Logging(LiteLLMLoggingBaseClass):
def should_run_callback(
self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
) -> bool:
if litellm.global_disable_no_log_param:
return True
@ -1027,9 +1024,9 @@ class Logging(LiteLLMLoggingBaseClass):
end_time = datetime.datetime.now()
if self.completion_start_time is None:
self.completion_start_time = end_time
self.model_call_details["completion_start_time"] = (
self.completion_start_time
)
self.model_call_details[
"completion_start_time"
] = self.completion_start_time
self.model_call_details["log_event_type"] = "successful_api_call"
self.model_call_details["end_time"] = end_time
self.model_call_details["cache_hit"] = cache_hit
@ -1083,13 +1080,14 @@ class Logging(LiteLLMLoggingBaseClass):
"response_cost"
]
else:
self.model_call_details["response_cost"] = (
self._response_cost_calculator(result=result)
)
self.model_call_details[
"response_cost"
] = self._response_cost_calculator(result=result)
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=result,
start_time=start_time,
@ -1098,11 +1096,11 @@ class Logging(LiteLLMLoggingBaseClass):
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
elif isinstance(result, dict): # pass-through endpoints
elif isinstance(result, dict) or isinstance(result, list):
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=result,
start_time=start_time,
@ -1111,11 +1109,10 @@ class Logging(LiteLLMLoggingBaseClass):
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
elif standard_logging_object is not None:
self.model_call_details["standard_logging_object"] = (
standard_logging_object
)
self.model_call_details[
"standard_logging_object"
] = standard_logging_object
else: # streaming chunks + image gen.
self.model_call_details["response_cost"] = None
@ -1154,7 +1151,6 @@ class Logging(LiteLLMLoggingBaseClass):
standard_logging_object=kwargs.get("standard_logging_object", None),
)
try:
## BUILD COMPLETE STREAMED RESPONSE
complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
@ -1172,15 +1168,16 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug(
"Logging Details LiteLLM-Success Call streaming complete"
)
self.model_call_details["complete_streaming_response"] = (
complete_streaming_response
)
self.model_call_details["response_cost"] = (
self._response_cost_calculator(result=complete_streaming_response)
)
self.model_call_details[
"complete_streaming_response"
] = complete_streaming_response
self.model_call_details[
"response_cost"
] = self._response_cost_calculator(result=complete_streaming_response)
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=complete_streaming_response,
start_time=start_time,
@ -1189,7 +1186,6 @@ class Logging(LiteLLMLoggingBaseClass):
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
callbacks = self.get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_success_callbacks,
global_callbacks=litellm.success_callback,
@ -1207,7 +1203,6 @@ class Logging(LiteLLMLoggingBaseClass):
## LOGGING HOOK ##
for callback in callbacks:
if isinstance(callback, CustomLogger):
self.model_call_details, result = callback.logging_hook(
kwargs=self.model_call_details,
result=result,
@ -1538,11 +1533,11 @@ class Logging(LiteLLMLoggingBaseClass):
)
else:
if self.stream and complete_streaming_response:
self.model_call_details["complete_response"] = (
self.model_call_details.get(
self.model_call_details[
"complete_response"
] = self.model_call_details.get(
"complete_streaming_response", {}
)
)
result = self.model_call_details["complete_response"]
openMeterLogger.log_success_event(
kwargs=self.model_call_details,
@ -1581,11 +1576,11 @@ class Logging(LiteLLMLoggingBaseClass):
)
else:
if self.stream and complete_streaming_response:
self.model_call_details["complete_response"] = (
self.model_call_details.get(
self.model_call_details[
"complete_response"
] = self.model_call_details.get(
"complete_streaming_response", {}
)
)
result = self.model_call_details["complete_response"]
callback.log_success_event(
@ -1659,7 +1654,6 @@ class Logging(LiteLLMLoggingBaseClass):
if self.call_type == CallTypes.aretrieve_batch.value and isinstance(
result, LiteLLMBatch
):
response_cost, batch_usage, batch_models = await _handle_completed_batch(
batch=result, custom_llm_provider=self.custom_llm_provider
)
@ -1692,9 +1686,9 @@ class Logging(LiteLLMLoggingBaseClass):
if complete_streaming_response is not None:
print_verbose("Async success callbacks: Got a complete streaming response")
self.model_call_details["async_complete_streaming_response"] = (
complete_streaming_response
)
self.model_call_details[
"async_complete_streaming_response"
] = complete_streaming_response
try:
if self.model_call_details.get("cache_hit", False) is True:
self.model_call_details["response_cost"] = 0.0
@ -1704,11 +1698,11 @@ class Logging(LiteLLMLoggingBaseClass):
model_call_details=self.model_call_details
)
# base_model defaults to None if not set on model_info
self.model_call_details["response_cost"] = (
self._response_cost_calculator(
self.model_call_details[
"response_cost"
] = self._response_cost_calculator(
result=complete_streaming_response
)
)
verbose_logger.debug(
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
@ -1720,8 +1714,9 @@ class Logging(LiteLLMLoggingBaseClass):
self.model_call_details["response_cost"] = None
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=complete_streaming_response,
start_time=start_time,
@ -1730,7 +1725,6 @@ class Logging(LiteLLMLoggingBaseClass):
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
callbacks = self.get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_async_success_callbacks,
global_callbacks=litellm._async_success_callback,
@ -1935,8 +1929,9 @@ class Logging(LiteLLMLoggingBaseClass):
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj={},
start_time=start_time,
@ -1947,7 +1942,6 @@ class Logging(LiteLLMLoggingBaseClass):
original_exception=exception,
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
return start_time, end_time
async def special_failure_handlers(self, exception: Exception):
@ -2084,7 +2078,6 @@ class Logging(LiteLLMLoggingBaseClass):
)
is not True
): # custom logger class
callback.log_failure_event(
start_time=start_time,
end_time=end_time,
@ -2713,9 +2706,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
endpoint=arize_config.endpoint,
)
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
)
os.environ[
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
] = f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
for callback in _in_memory_loggers:
if (
isinstance(callback, ArizeLogger)
@ -2739,9 +2732,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
# auth can be disabled on local deployments of arize phoenix
if arize_phoenix_config.otlp_auth_headers is not None:
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
arize_phoenix_config.otlp_auth_headers
)
os.environ[
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
] = arize_phoenix_config.otlp_auth_headers
for callback in _in_memory_loggers:
if (
@ -2832,9 +2825,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
exporter="otlp_http",
endpoint="https://langtrace.ai/api/trace",
)
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
f"api_key={os.getenv('LANGTRACE_API_KEY')}"
)
os.environ[
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
] = f"api_key={os.getenv('LANGTRACE_API_KEY')}"
for callback in _in_memory_loggers:
if (
isinstance(callback, OpenTelemetry)
@ -3114,6 +3107,7 @@ class StandardLoggingPayloadSetup:
litellm_params: Optional[dict] = None,
prompt_integration: Optional[str] = None,
applied_guardrails: Optional[List[str]] = None,
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
) -> StandardLoggingMetadata:
"""
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
@ -3160,6 +3154,7 @@ class StandardLoggingPayloadSetup:
user_api_key_end_user_id=None,
prompt_management_metadata=prompt_management_metadata,
applied_guardrails=applied_guardrails,
mcp_tool_call_metadata=mcp_tool_call_metadata,
)
if isinstance(metadata, dict):
# Filter the metadata dictionary to include only the specified keys
@ -3223,7 +3218,6 @@ class StandardLoggingPayloadSetup:
custom_llm_provider: Optional[str],
init_response_obj: Union[Any, BaseModel, dict],
) -> StandardLoggingModelInformation:
model_cost_name = _select_model_name_for_cost_calc(
model=None,
completion_response=init_response_obj, # type: ignore
@ -3286,7 +3280,6 @@ class StandardLoggingPayloadSetup:
def get_additional_headers(
additiona_headers: Optional[dict],
) -> Optional[StandardLoggingAdditionalHeaders]:
if additiona_headers is None:
return None
@ -3322,11 +3315,11 @@ class StandardLoggingPayloadSetup:
for key in StandardLoggingHiddenParams.__annotations__.keys():
if key in hidden_params:
if key == "additional_headers":
clean_hidden_params["additional_headers"] = (
StandardLoggingPayloadSetup.get_additional_headers(
clean_hidden_params[
"additional_headers"
] = StandardLoggingPayloadSetup.get_additional_headers(
hidden_params[key]
)
)
else:
clean_hidden_params[key] = hidden_params[key] # type: ignore
return clean_hidden_params
@ -3463,13 +3456,15 @@ def get_standard_logging_object_payload(
)
# cleanup timestamps
start_time_float, end_time_float, completion_start_time_float = (
StandardLoggingPayloadSetup.cleanup_timestamps(
(
start_time_float,
end_time_float,
completion_start_time_float,
) = StandardLoggingPayloadSetup.cleanup_timestamps(
start_time=start_time,
end_time=end_time,
completion_start_time=completion_start_time,
)
)
response_time = StandardLoggingPayloadSetup.get_response_time(
start_time_float=start_time_float,
end_time_float=end_time_float,
@ -3486,6 +3481,7 @@ def get_standard_logging_object_payload(
litellm_params=litellm_params,
prompt_integration=kwargs.get("prompt_integration", None),
applied_guardrails=kwargs.get("applied_guardrails", None),
mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
)
_request_body = proxy_server_request.get("body", {})
@ -3495,7 +3491,6 @@ def get_standard_logging_object_payload(
saved_cache_cost: float = 0.0
if cache_hit is True:
id = f"{id}_cache_hit{time.time()}" # do not duplicate the request id
saved_cache_cost = (
logging_obj._response_cost_calculator(
@ -3626,6 +3621,7 @@ def get_standard_logging_metadata(
user_api_key_end_user_id=None,
prompt_management_metadata=None,
applied_guardrails=None,
mcp_tool_call_metadata=None,
)
if isinstance(metadata, dict):
# Filter the metadata dictionary to include only the specified keys
@ -3658,9 +3654,9 @@ def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]):
):
for k, v in metadata["user_api_key_metadata"].items():
if k == "logging": # prevent logging user logging keys
cleaned_user_api_key_metadata[k] = (
"scrubbed_by_litellm_for_sensitive_keys"
)
cleaned_user_api_key_metadata[
k
] = "scrubbed_by_litellm_for_sensitive_keys"
else:
cleaned_user_api_key_metadata[k] = v

View file

@ -1,7 +1,7 @@
# What is this?
## Helper utilities for cost_per_token()
from typing import Optional, Tuple
from typing import Optional, Tuple, cast
import litellm
from litellm import verbose_logger
@ -121,6 +121,31 @@ def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> floa
return model_info["output_cost_per_token"]
def calculate_cost_component(
model_info: ModelInfo, cost_key: str, usage_value: Optional[float]
) -> float:
"""
Generic cost calculator for any usage component
Args:
model_info: Dictionary containing cost information
cost_key: The key for the cost multiplier in model_info (e.g., 'input_cost_per_audio_token')
usage_value: The actual usage value (e.g., number of tokens, characters, seconds)
Returns:
float: The calculated cost
"""
cost_per_unit = model_info.get(cost_key)
if (
cost_per_unit is not None
and isinstance(cost_per_unit, float)
and usage_value is not None
and usage_value > 0
):
return float(usage_value) * cost_per_unit
return 0.0
def generic_cost_per_token(
model: str, usage: Usage, custom_llm_provider: str
) -> Tuple[float, float]:
@ -136,6 +161,7 @@ def generic_cost_per_token(
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
"""
## GET MODEL INFO
model_info = get_model_info(model=model, custom_llm_provider=custom_llm_provider)
@ -143,38 +169,124 @@ def generic_cost_per_token(
### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
prompt_cost = 0.0
### PROCESSING COST
non_cache_hit_tokens = usage.prompt_tokens
text_tokens = usage.prompt_tokens
cache_hit_tokens = 0
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
cache_hit_tokens = usage.prompt_tokens_details.cached_tokens
non_cache_hit_tokens = non_cache_hit_tokens - cache_hit_tokens
audio_tokens = 0
character_count = 0
image_count = 0
video_length_seconds = 0
if usage.prompt_tokens_details:
cache_hit_tokens = (
cast(
Optional[int], getattr(usage.prompt_tokens_details, "cached_tokens", 0)
)
or 0
)
text_tokens = (
cast(
Optional[int], getattr(usage.prompt_tokens_details, "text_tokens", None)
)
or 0 # default to prompt tokens, if this field is not set
)
audio_tokens = (
cast(Optional[int], getattr(usage.prompt_tokens_details, "audio_tokens", 0))
or 0
)
character_count = (
cast(
Optional[int],
getattr(usage.prompt_tokens_details, "character_count", 0),
)
or 0
)
image_count = (
cast(Optional[int], getattr(usage.prompt_tokens_details, "image_count", 0))
or 0
)
video_length_seconds = (
cast(
Optional[int],
getattr(usage.prompt_tokens_details, "video_length_seconds", 0),
)
or 0
)
## EDGE CASE - text tokens not set inside PromptTokensDetails
if text_tokens == 0:
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
prompt_cost = float(non_cache_hit_tokens) * prompt_base_cost
prompt_cost = float(text_tokens) * prompt_base_cost
_cache_read_input_token_cost = model_info.get("cache_read_input_token_cost")
if (
_cache_read_input_token_cost is not None
and usage.prompt_tokens_details
and usage.prompt_tokens_details.cached_tokens
):
prompt_cost += (
float(usage.prompt_tokens_details.cached_tokens)
* _cache_read_input_token_cost
### CACHE READ COST
prompt_cost += calculate_cost_component(
model_info, "cache_read_input_token_cost", cache_hit_tokens
)
### AUDIO COST
prompt_cost += calculate_cost_component(
model_info, "input_cost_per_audio_token", audio_tokens
)
### CACHE WRITING COST
_cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost")
if _cache_creation_input_token_cost is not None:
prompt_cost += (
float(usage._cache_creation_input_tokens) * _cache_creation_input_token_cost
prompt_cost += calculate_cost_component(
model_info,
"cache_creation_input_token_cost",
usage._cache_creation_input_tokens,
)
### CHARACTER COST
prompt_cost += calculate_cost_component(
model_info, "input_cost_per_character", character_count
)
### IMAGE COUNT COST
prompt_cost += calculate_cost_component(
model_info, "input_cost_per_image", image_count
)
### VIDEO LENGTH COST
prompt_cost += calculate_cost_component(
model_info, "input_cost_per_video_per_second", video_length_seconds
)
## CALCULATE OUTPUT COST
completion_base_cost = _get_completion_token_base_cost(
model_info=model_info, usage=usage
)
completion_cost = usage["completion_tokens"] * completion_base_cost
text_tokens = usage.completion_tokens
audio_tokens = 0
if usage.completion_tokens_details is not None:
audio_tokens = (
cast(
Optional[int],
getattr(usage.completion_tokens_details, "audio_tokens", 0),
)
or 0
)
text_tokens = (
cast(
Optional[int],
getattr(usage.completion_tokens_details, "text_tokens", None),
)
or usage.completion_tokens # default to completion tokens, if this field is not set
)
## TEXT COST
completion_cost = float(text_tokens) * completion_base_cost
_output_cost_per_audio_token: Optional[float] = model_info.get(
"output_cost_per_audio_token"
)
## AUDIO COST
if (
_output_cost_per_audio_token is not None
and audio_tokens is not None
and audio_tokens > 0
):
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
return prompt_cost, completion_cost

Some files were not shown because too many files have changed in this diff Show more