Merge branch 'main' into litellm_sagemaker_fix_stream

This commit is contained in:
Ishaan Jaff 2025-03-31 14:22:20 -07:00
commit 83ba96b8c6
452 changed files with 13927 additions and 3613 deletions

View file

@ -3,6 +3,18 @@ orbs:
codecov: codecov/codecov@4.0.1 codecov: codecov/codecov@4.0.1
node: circleci/node@5.1.0 # Add this line to declare the node orb node: circleci/node@5.1.0 # Add this line to declare the node orb
commands:
setup_google_dns:
steps:
- run:
name: "Configure Google DNS"
command: |
# Backup original resolv.conf
sudo cp /etc/resolv.conf /etc/resolv.conf.backup
# Add both local and Google DNS servers
echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
jobs: jobs:
local_testing: local_testing:
@ -15,7 +27,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Show git commit hash name: Show git commit hash
command: | command: |
@ -134,7 +146,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Show git commit hash name: Show git commit hash
command: | command: |
@ -234,7 +246,13 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run:
name: DNS lookup for Redis host
command: |
sudo apt-get update
sudo apt-get install -y dnsutils
dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
- run: - run:
name: Show git commit hash name: Show git commit hash
command: | command: |
@ -334,6 +352,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -388,6 +407,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -429,6 +449,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Show git commit hash name: Show git commit hash
command: | command: |
@ -479,7 +500,13 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- run:
name: Install PostgreSQL
command: |
sudo apt-get update
sudo apt-get install postgresql postgresql-contrib
echo 'export PATH=/usr/lib/postgresql/*/bin:$PATH' >> $BASH_ENV
- setup_google_dns
- run: - run:
name: Show git commit hash name: Show git commit hash
command: | command: |
@ -534,6 +561,7 @@ jobs:
pip install "diskcache==5.6.1" pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0" pip install "Pillow==10.3.0"
pip install "jsonschema==4.22.0" pip install "jsonschema==4.22.0"
pip install "pytest-postgresql==7.0.1"
- save_cache: - save_cache:
paths: paths:
- ./venv - ./venv
@ -569,7 +597,7 @@ jobs:
- litellm_proxy_unit_tests_coverage - litellm_proxy_unit_tests_coverage
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
docker: docker:
- image: cimg/python:3.11 - image: cimg/python:3.13.1
auth: auth:
username: ${DOCKERHUB_USERNAME} username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD} password: ${DOCKERHUB_PASSWORD}
@ -577,6 +605,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -618,6 +647,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -654,6 +684,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -696,6 +727,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -740,6 +772,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -782,6 +815,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -828,6 +862,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -872,6 +907,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -918,6 +954,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -960,6 +997,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -1002,6 +1040,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -1048,6 +1087,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -1080,6 +1120,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -1104,6 +1145,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
# Install Helm # Install Helm
- run: - run:
name: Install Helm name: Install Helm
@ -1173,6 +1215,7 @@ jobs:
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Dependencies name: Install Dependencies
command: | command: |
@ -1209,6 +1252,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Python 3.9 name: Install Python 3.9
command: | command: |
@ -1283,6 +1327,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Docker CLI (In case it's not already installed) name: Install Docker CLI (In case it's not already installed)
command: | command: |
@ -1418,6 +1463,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Docker CLI (In case it's not already installed) name: Install Docker CLI (In case it's not already installed)
command: | command: |
@ -1542,6 +1588,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Docker CLI (In case it's not already installed) name: Install Docker CLI (In case it's not already installed)
command: | command: |
@ -1704,6 +1751,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Docker CLI (In case it's not already installed) name: Install Docker CLI (In case it's not already installed)
command: | command: |
@ -1815,6 +1863,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Docker CLI (In case it's not already installed) name: Install Docker CLI (In case it's not already installed)
command: | command: |
@ -1897,6 +1946,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
# Remove Docker CLI installation since it's already available in machine executor # Remove Docker CLI installation since it's already available in machine executor
- run: - run:
name: Install Python 3.13 name: Install Python 3.13
@ -1994,6 +2044,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Install Docker CLI (In case it's not already installed) name: Install Docker CLI (In case it's not already installed)
command: | command: |
@ -2039,6 +2090,8 @@ jobs:
pip install "google-cloud-aiplatform==1.59.0" pip install "google-cloud-aiplatform==1.59.0"
pip install "anthropic==0.49.0" pip install "anthropic==0.49.0"
pip install "langchain_mcp_adapters==0.0.5" pip install "langchain_mcp_adapters==0.0.5"
pip install "langchain_openai==0.2.1"
pip install "langgraph==0.3.18"
# Run pytest and generate JUnit XML report # Run pytest and generate JUnit XML report
- run: - run:
name: Build Docker image name: Build Docker image
@ -2251,6 +2304,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Build UI name: Build UI
command: | command: |
@ -2365,6 +2419,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Build Docker image name: Build Docker image
command: | command: |
@ -2387,6 +2442,7 @@ jobs:
working_directory: ~/project working_directory: ~/project
steps: steps:
- checkout - checkout
- setup_google_dns
- run: - run:
name: Build Docker image name: Build Docker image
command: | command: |

206
.github/workflows/publish-migrations.yml vendored Normal file
View file

@ -0,0 +1,206 @@
name: Publish Prisma Migrations
permissions:
contents: write
pull-requests: write
on:
push:
paths:
- 'schema.prisma' # Check root schema.prisma
branches:
- main
jobs:
publish-migrations:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:14
env:
POSTGRES_DB: temp_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
# Add shadow database service
postgres_shadow:
image: postgres:14
env:
POSTGRES_DB: shadow_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5433:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
pip install prisma
pip install python-dotenv
- name: Generate Initial Migration if None Exists
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
mkdir -p deploy/migrations
echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
echo "No existing migrations found, creating baseline..."
VERSION=$(date +%Y%m%d%H%M%S)
mkdir -p deploy/migrations/${VERSION}_initial
echo "Generating initial migration..."
# Save raw output for debugging
prisma migrate diff \
--from-empty \
--to-schema-datamodel schema.prisma \
--shadow-database-url "${SHADOW_DATABASE_URL}" \
--script > deploy/migrations/${VERSION}_initial/raw_migration.sql
echo "Raw migration file content:"
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
echo "Cleaning migration file..."
# Clean the file
sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
# Verify the migration file
if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
echo "ERROR: Migration file is empty after cleaning"
echo "Original content was:"
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
exit 1
fi
echo "Final migration file content:"
cat deploy/migrations/${VERSION}_initial/migration.sql
# Verify it starts with SQL
if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
echo "ERROR: Migration file does not start with SQL command or comment"
echo "First line is:"
head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
echo "Full content is:"
cat deploy/migrations/${VERSION}_initial/migration.sql
exit 1
fi
echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
fi
- name: Compare and Generate Migration
if: success()
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
# Create temporary migration workspace
mkdir -p temp_migrations
# Copy existing migrations (will not fail if directory is empty)
cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
VERSION=$(date +%Y%m%d%H%M%S)
# Generate diff against existing migrations or empty state
prisma migrate diff \
--from-migrations temp_migrations \
--to-schema-datamodel schema.prisma \
--shadow-database-url "${SHADOW_DATABASE_URL}" \
--script > temp_migrations/migration_${VERSION}.sql
# Check if there are actual changes
if [ -s temp_migrations/migration_${VERSION}.sql ]; then
echo "Changes detected, creating new migration"
mkdir -p deploy/migrations/${VERSION}_schema_update
mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
else
echo "No schema changes detected"
exit 0
fi
- name: Verify Migration
if: success()
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
# Create test database
psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
# Apply all migrations in order to verify
for migration in deploy/migrations/*/migration.sql; do
echo "Applying migration: $migration"
psql "${SHADOW_DATABASE_URL}" -f $migration
done
# Add this step before create-pull-request to debug permissions
- name: Check Token Permissions
run: |
echo "Checking token permissions..."
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/BerriAI/litellm/collaborators
echo "\nChecking if token can create PRs..."
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/BerriAI/litellm
# Add this debug step before git push
- name: Debug Changed Files
run: |
echo "Files staged for commit:"
git diff --name-status --staged
echo "\nAll changed files:"
git status
- name: Create Pull Request
if: success()
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: "chore: update prisma migrations"
title: "Update Prisma Migrations"
body: |
Auto-generated migration based on schema.prisma changes.
Generated files:
- deploy/migrations/${VERSION}_schema_update/migration.sql
- deploy/migrations/${VERSION}_schema_update/README.md
branch: feat/prisma-migration-${{ env.VERSION }}
base: main
delete-branch: true
- name: Generate and Save Migrations
run: |
# Only add migration files
git add deploy/migrations/
git status # Debug what's being committed
git commit -m "chore: update prisma migrations"

53
.github/workflows/test-linting.yml vendored Normal file
View file

@ -0,0 +1,53 @@
name: LiteLLM Linting
on:
pull_request:
branches: [ main ]
jobs:
lint:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install dependencies
run: |
poetry install --with dev
- name: Run Black formatting check
run: |
cd litellm
poetry run black . --check
cd ..
- name: Run Ruff linting
run: |
cd litellm
poetry run ruff check .
cd ..
- name: Run MyPy type checking
run: |
cd litellm
poetry run mypy . --ignore-missing-imports
cd ..
- name: Check for circular imports
run: |
cd litellm
poetry run python ../tests/documentation_tests/test_circular_imports.py
cd ..
- name: Check import safety
run: |
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)

35
.github/workflows/test-litellm.yml vendored Normal file
View file

@ -0,0 +1,35 @@
name: LiteLLM Mock Tests (folder - tests/litellm)
on:
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- name: Thank You Message
run: |
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install dependencies
run: |
poetry install --with dev,proxy-dev --extras proxy
poetry run pip install pytest-xdist
- name: Run tests
run: |
poetry run pytest tests/litellm -x -vv -n 4

1
.gitignore vendored
View file

@ -83,4 +83,5 @@ tests/llm_translation/test_vertex_key.json
litellm/proxy/migrations/0_init/migration.sql litellm/proxy/migrations/0_init/migration.sql
litellm/proxy/db/migrations/0_init/migration.sql litellm/proxy/db/migrations/0_init/migration.sql
litellm/proxy/db/migrations/* litellm/proxy/db/migrations/*
litellm/proxy/migrations/*config.yaml
litellm/proxy/migrations/* litellm/proxy/migrations/*

View file

@ -6,44 +6,35 @@ repos:
entry: pyright entry: pyright
language: system language: system
types: [python] types: [python]
files: ^litellm/ files: ^(litellm/|litellm_proxy_extras/)
- id: isort - id: isort
name: isort name: isort
entry: isort entry: isort
language: system language: system
types: [python] types: [python]
files: litellm/.*\.py files: (litellm/|litellm_proxy_extras/).*\.py
exclude: ^litellm/__init__.py$ exclude: ^litellm/__init__.py$
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black - id: black
name: black
entry: poetry run black
language: system
types: [python]
files: (litellm/|litellm_proxy_extras/).*\.py
- repo: https://github.com/pycqa/flake8 - repo: https://github.com/pycqa/flake8
rev: 7.0.0 # The version of flake8 to use rev: 7.0.0 # The version of flake8 to use
hooks: hooks:
- id: flake8 - id: flake8
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/ exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
additional_dependencies: [flake8-print] additional_dependencies: [flake8-print]
files: litellm/.*\.py files: (litellm/|litellm_proxy_extras/).*\.py
# - id: flake8
# name: flake8 (router.py function length)
# files: ^litellm/router\.py$
# args: [--max-function-length=40]
# # additional_dependencies: [flake8-functions]
- repo: https://github.com/python-poetry/poetry - repo: https://github.com/python-poetry/poetry
rev: 1.8.0 rev: 1.8.0
hooks: hooks:
- id: poetry-check - id: poetry-check
files: ^(pyproject.toml|litellm-proxy-extras/pyproject.toml)$
- repo: local - repo: local
hooks: hooks:
- id: check-files-match - id: check-files-match
name: Check if files match name: Check if files match
entry: python3 ci_cd/check_files_match.py entry: python3 ci_cd/check_files_match.py
language: system language: system
# - id: check-file-length
# name: Check file length
# entry: python check_file_length.py
# args: ["10000"] # set your desired maximum number of lines
# language: python
# files: litellm/.*\.py
# exclude: ^litellm/tests/

View file

@ -14,6 +14,9 @@ help:
install-dev: install-dev:
poetry install --with dev poetry install --with dev
install-proxy-dev:
poetry install --with dev,proxy-dev
lint: install-dev lint: install-dev
poetry run pip install types-requests types-setuptools types-redis types-PyYAML poetry run pip install types-requests types-setuptools types-redis types-PyYAML
cd litellm && poetry run mypy . --ignore-missing-imports cd litellm && poetry run mypy . --ignore-missing-imports

View file

@ -16,9 +16,6 @@
<a href="https://pypi.org/project/litellm/" target="_blank"> <a href="https://pypi.org/project/litellm/" target="_blank">
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version"> <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
</a> </a>
<a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
<img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
</a>
<a href="https://www.ycombinator.com/companies/berriai"> <a href="https://www.ycombinator.com/companies/berriai">
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23"> <img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
</a> </a>

60
ci_cd/baseline_db.py Normal file
View file

@ -0,0 +1,60 @@
import subprocess
from pathlib import Path
from datetime import datetime
def create_baseline():
"""Create baseline migration in deploy/migrations"""
try:
# Get paths
root_dir = Path(__file__).parent.parent
deploy_dir = root_dir / "deploy"
migrations_dir = deploy_dir / "migrations"
schema_path = root_dir / "schema.prisma"
# Create migrations directory
migrations_dir.mkdir(parents=True, exist_ok=True)
# Create migration_lock.toml if it doesn't exist
lock_file = migrations_dir / "migration_lock.toml"
if not lock_file.exists():
lock_file.write_text('provider = "postgresql"\n')
# Create timestamp-based migration directory
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
migration_dir = migrations_dir / f"{timestamp}_baseline"
migration_dir.mkdir(parents=True, exist_ok=True)
# Generate migration SQL
result = subprocess.run(
[
"prisma",
"migrate",
"diff",
"--from-empty",
"--to-schema-datamodel",
str(schema_path),
"--script",
],
capture_output=True,
text=True,
check=True,
)
# Write the SQL to migration.sql
migration_file = migration_dir / "migration.sql"
migration_file.write_text(result.stdout)
print(f"Created baseline migration in {migration_dir}")
return True
except subprocess.CalledProcessError as e:
print(f"Error running prisma command: {e.stderr}")
return False
except Exception as e:
print(f"Error creating baseline migration: {str(e)}")
return False
if __name__ == "__main__":
create_baseline()

View file

@ -0,0 +1,19 @@
#!/bin/bash
# Exit on error
set -e
echo "🚀 Building and publishing litellm-proxy-extras"
# Navigate to litellm-proxy-extras directory
cd "$(dirname "$0")/../litellm-proxy-extras"
# Build the package
echo "📦 Building package..."
poetry build
# Publish to PyPI
echo "🌎 Publishing to PyPI..."
poetry publish
echo "✅ Done! Package published successfully"

95
ci_cd/run_migration.py Normal file
View file

@ -0,0 +1,95 @@
import os
import subprocess
from pathlib import Path
from datetime import datetime
import testing.postgresql
import shutil
def create_migration(migration_name: str = None):
"""
Create a new migration SQL file in the migrations directory by comparing
current database state with schema
Args:
migration_name (str): Name for the migration
"""
try:
# Get paths
root_dir = Path(__file__).parent.parent
migrations_dir = root_dir / "litellm-proxy-extras" / "litellm_proxy_extras" / "migrations"
schema_path = root_dir / "schema.prisma"
# Create temporary PostgreSQL database
with testing.postgresql.Postgresql() as postgresql:
db_url = postgresql.url()
# Create temporary migrations directory next to schema.prisma
temp_migrations_dir = schema_path.parent / "migrations"
try:
# Copy existing migrations to temp directory
if temp_migrations_dir.exists():
shutil.rmtree(temp_migrations_dir)
shutil.copytree(migrations_dir, temp_migrations_dir)
# Apply existing migrations to temp database
os.environ["DATABASE_URL"] = db_url
subprocess.run(
["prisma", "migrate", "deploy", "--schema", str(schema_path)],
check=True,
)
# Generate diff between current database and schema
result = subprocess.run(
[
"prisma",
"migrate",
"diff",
"--from-url",
db_url,
"--to-schema-datamodel",
str(schema_path),
"--script",
],
capture_output=True,
text=True,
check=True,
)
if result.stdout.strip():
# Generate timestamp and create migration directory
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
migration_name = migration_name or "unnamed_migration"
migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
migration_dir.mkdir(parents=True, exist_ok=True)
# Write the SQL to migration.sql
migration_file = migration_dir / "migration.sql"
migration_file.write_text(result.stdout)
print(f"Created migration in {migration_dir}")
return True
else:
print("No schema changes detected. Migration not needed.")
return False
finally:
# Clean up: remove temporary migrations directory
if temp_migrations_dir.exists():
shutil.rmtree(temp_migrations_dir)
except subprocess.CalledProcessError as e:
print(f"Error generating migration: {e.stderr}")
return False
except Exception as e:
print(f"Error creating migration: {str(e)}")
return False
if __name__ == "__main__":
# If running directly, can optionally pass migration name as argument
import sys
migration_name = sys.argv[1] if len(sys.argv) > 1 else None
create_migration(migration_name)

View file

@ -1,5 +1,35 @@
version: "3.11" version: "3.11"
services: services:
litellm:
build:
context: .
args:
target: runtime
image: ghcr.io/berriai/litellm:main-stable
#########################################
## Uncomment these lines to start proxy with a config.yaml file ##
# volumes:
# - ./config.yaml:/app/config.yaml <<- this is missing in the docker-compose file currently
# command:
# - "--config=/app/config.yaml"
##############################################
ports:
- "4000:4000" # Map the container port to the host, change the host port if necessary
environment:
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
env_file:
- .env # Load local .env file
depends_on:
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
healthcheck: # Defines the health check configuration for the container
test: [ "CMD", "curl", "-f", "http://localhost:4000/health/liveliness || exit 1" ] # Command to execute for health check
interval: 30s # Perform health check every 30 seconds
timeout: 10s # Health check command times out after 10 seconds
retries: 3 # Retry up to 3 times if health check fails
start_period: 40s # Wait 40 seconds after container start before beginning health checks
db: db:
image: postgres:16 image: postgres:16
restart: always restart: always
@ -16,3 +46,23 @@ services:
interval: 1s interval: 1s
timeout: 5s timeout: 5s
retries: 10 retries: 10
prometheus:
image: prom/prometheus
volumes:
- prometheus_data:/prometheus
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=15d'
restart: always
volumes:
prometheus_data:
driver: local
postgres_data:
name: litellm_postgres_data # Named volume for Postgres data persistence

View file

@ -4,21 +4,177 @@ import Image from '@theme/IdealImage';
# /mcp [BETA] - Model Context Protocol # /mcp [BETA] - Model Context Protocol
Use Model Context Protocol with LiteLLM ## Expose MCP tools on LiteLLM Proxy Server
This allows you to define tools that can be called by any MCP compatible client. Define your `mcp_servers` with LiteLLM and all your clients can list and call available tools.
<Image <Image
img={require('../img/litellm_mcp.png')} img={require('../img/mcp_2.png')}
style={{width: '100%', display: 'block', margin: '2rem auto'}} style={{width: '100%', display: 'block', margin: '2rem auto'}}
/> />
<p style={{textAlign: 'left', color: '#666'}}> <p style={{textAlign: 'left', color: '#666'}}>
LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
</p> </p>
#### How it works
## Overview LiteLLM exposes the following MCP endpoints:
LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP - `/mcp/tools/list` - List all available tools
- `/mcp/tools/call` - Call a specific tool with the provided arguments
When MCP clients connect to LiteLLM they can follow this workflow:
1. Connect to the LiteLLM MCP server
2. List all available tools on LiteLLM
3. Client makes LLM API request with tool call(s)
4. LLM API returns which tools to call and with what arguments
5. MCP client makes MCP tool calls to LiteLLM
6. LiteLLM makes the tool calls to the appropriate MCP server
7. LiteLLM returns the tool call results to the MCP client
#### Usage
#### 1. Define your tools on under `mcp_servers` in your config.yaml file.
LiteLLM allows you to define your tools on the `mcp_servers` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
```yaml title="config.yaml" showLineNumbers
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: sk-xxxxxxx
mcp_servers:
{
"zapier_mcp": {
"url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
},
"fetch": {
"url": "http://localhost:8000/sse"
}
}
```
#### 2. Start LiteLLM Gateway
<Tabs>
<TabItem value="docker" label="Docker Run">
```shell title="Docker Run" showLineNumbers
docker run -d \
-p 4000:4000 \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
--name my-app \
-v $(pwd)/my_config.yaml:/app/config.yaml \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
```
</TabItem>
<TabItem value="py" label="litellm pip">
```shell title="litellm pip" showLineNumbers
litellm --config config.yaml --detailed_debug
```
</TabItem>
</Tabs>
#### 3. Make an LLM API request
In this example we will do the following:
1. Use MCP client to list MCP tools on LiteLLM Proxy
2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
3. Provide the MCP tools to `gpt-4o`
4. Handle tool call from `gpt-4o`
5. Convert OpenAI tool call to MCP tool call
6. Execute tool call on MCP server
```python title="MCP Client List Tools" showLineNumbers
import asyncio
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionUserMessageParam
from mcp import ClientSession
from mcp.client.sse import sse_client
from litellm.experimental_mcp_client.tools import (
transform_mcp_tool_to_openai_tool,
transform_openai_tool_call_request_to_mcp_tool_call_request,
)
async def main():
# Initialize clients
# point OpenAI client to LiteLLM Proxy
client = AsyncOpenAI(api_key="sk-1234", base_url="http://localhost:4000")
# Point MCP client to LiteLLM Proxy
async with sse_client("http://localhost:4000/mcp/") as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
# 1. List MCP tools on LiteLLM Proxy
mcp_tools = await session.list_tools()
print("List of MCP tools for MCP server:", mcp_tools.tools)
# Create message
messages = [
ChatCompletionUserMessageParam(
content="Send an email about LiteLLM supporting MCP", role="user"
)
]
# 2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
# Since OpenAI only supports tools in the OpenAI format, we need to convert the MCP tools to the OpenAI format.
openai_tools = [
transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
]
# 3. Provide the MCP tools to `gpt-4o`
response = await client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=openai_tools,
tool_choice="auto",
)
# 4. Handle tool call from `gpt-4o`
if response.choices[0].message.tool_calls:
tool_call = response.choices[0].message.tool_calls[0]
if tool_call:
# 5. Convert OpenAI tool call to MCP tool call
# Since MCP servers expect tools in the MCP format, we need to convert the OpenAI tool call to the MCP format.
# This is done using litellm.experimental_mcp_client.tools.transform_openai_tool_call_request_to_mcp_tool_call_request
mcp_call = (
transform_openai_tool_call_request_to_mcp_tool_call_request(
openai_tool=tool_call.model_dump()
)
)
# 6. Execute tool call on MCP server
result = await session.call_tool(
name=mcp_call.name, arguments=mcp_call.arguments
)
print("Result:", result)
# Run it
asyncio.run(main())
```
## LiteLLM Python SDK MCP Bridge
LiteLLM Python SDK acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
- **List** Available MCP Tools: OpenAI clients can view all available MCP tools - **List** Available MCP Tools: OpenAI clients can view all available MCP tools
- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools - `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
@ -26,8 +182,6 @@ LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported mod
- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server - `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
## Usage
### 1. List Available MCP Tools ### 1. List Available MCP Tools
In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways: In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
@ -271,215 +425,3 @@ async with stdio_client(server_params) as (read, write):
</TabItem> </TabItem>
</Tabs> </Tabs>
## Upcoming Features
:::info
**This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
:::
### Expose MCP tools on LiteLLM Proxy Server
This allows you to define tools that can be called by any MCP compatible client. Define your mcp_tools with LiteLLM and all your clients can list and call available tools.
#### How it works
LiteLLM exposes the following MCP endpoints:
- `/mcp/list_tools` - List all available tools
- `/mcp/call_tool` - Call a specific tool with the provided arguments
When MCP clients connect to LiteLLM they can follow this workflow:
1. Connect to the LiteLLM MCP server
2. List all available tools on LiteLLM
3. Client makes LLM API request with tool call(s)
4. LLM API returns which tools to call and with what arguments
5. MCP client makes tool calls to LiteLLM
6. LiteLLM makes the tool calls to the appropriate handlers
7. LiteLLM returns the tool call results to the MCP client
#### Usage
#### 1. Define your tools on mcp_tools
LiteLLM allows you to define your tools on the `mcp_tools` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
```yaml
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: sk-xxxxxxx
mcp_tools:
- name: "get_current_time"
description: "Get the current time"
input_schema: {
"type": "object",
"properties": {
"format": {
"type": "string",
"description": "The format of the time to return",
"enum": ["short"]
}
}
}
handler: "mcp_tools.get_current_time"
```
#### 2. Define a handler for your tool
Create a new file called `mcp_tools.py` and add this code. The key method here is `get_current_time` which gets executed when the `get_current_time` tool is called.
```python
# mcp_tools.py
from datetime import datetime
def get_current_time(format: str = "short"):
"""
Simple handler for the 'get_current_time' tool.
Args:
format (str): The format of the time to return ('short').
Returns:
str: The current time formatted as 'HH:MM'.
"""
# Get the current time
current_time = datetime.now()
# Format the time as 'HH:MM'
return current_time.strftime('%H:%M')
```
#### 3. Start LiteLLM Gateway
<Tabs>
<TabItem value="docker" label="Docker Run">
Mount your `mcp_tools.py` on the LiteLLM Docker container.
```shell
docker run -d \
-p 4000:4000 \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
--name my-app \
-v $(pwd)/my_config.yaml:/app/config.yaml \
-v $(pwd)/mcp_tools.py:/app/mcp_tools.py \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
```
</TabItem>
<TabItem value="py" label="litellm pip">
```shell
litellm --config config.yaml --detailed_debug
```
</TabItem>
</Tabs>
#### 4. Make an LLM API request
```python
import asyncio
from langchain_mcp_adapters.tools import load_mcp_tools
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
from mcp import ClientSession
from mcp.client.sse import sse_client
async def main():
# Initialize the model with your API key
model = ChatOpenAI(model="gpt-4o")
# Connect to the MCP server
async with sse_client(url="http://localhost:4000/mcp/") as (read, write):
async with ClientSession(read, write) as session:
# Initialize the session
print("Initializing session...")
await session.initialize()
print("Session initialized")
# Load available tools from MCP
print("Loading tools...")
tools = await load_mcp_tools(session)
print(f"Loaded {len(tools)} tools")
# Create a ReAct agent with the model and tools
agent = create_react_agent(model, tools)
# Run the agent with a user query
user_query = "What's the weather in Tokyo?"
print(f"Asking: {user_query}")
agent_response = await agent.ainvoke({"messages": user_query})
print("Agent response:")
print(agent_response)
if __name__ == "__main__":
asyncio.run(main())
```
### Specification for `mcp_tools`
The `mcp_tools` section in your LiteLLM config defines tools that can be called by MCP-compatible clients.
#### Tool Definition Format
```yaml
mcp_tools:
- name: string # Required: Name of the tool
description: string # Required: Description of what the tool does
input_schema: object # Required: JSON Schema defining the tool's input parameters
handler: string # Required: Path to the function that implements the tool
```
#### Field Details
- `name`: A unique identifier for the tool
- `description`: A clear description of what the tool does, used by LLMs to determine when to call it
- `input_schema`: JSON Schema object defining the expected input parameters
- `handler`: String path to the Python function that implements the tool (e.g., "module.submodule.function_name")
#### Example Tool Definition
```yaml
mcp_tools:
- name: "get_current_time"
description: "Get the current time in a specified format"
input_schema: {
"type": "object",
"properties": {
"format": {
"type": "string",
"description": "The format of the time to return",
"enum": ["short", "long", "iso"]
},
"timezone": {
"type": "string",
"description": "The timezone to use (e.g., 'UTC', 'America/New_York')",
"default": "UTC"
}
},
"required": ["format"]
}
handler: "mcp_tools.get_current_time"
```

View file

@ -664,6 +664,58 @@ curl http://0.0.0.0:4000/v1/chat/completions \
</TabItem> </TabItem>
</Tabs> </Tabs>
## Usage - Latency Optimized Inference
Valid from v1.65.1+
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
response = completion(
model="bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0",
messages=[{"role": "user", "content": "What is the capital of France?"}],
performanceConfig={"latency": "optimized"},
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: bedrock-claude-3-7
litellm_params:
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
performanceConfig: {"latency": "optimized"} # 👈 EITHER HERE OR ON REQUEST
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "bedrock-claude-3-7",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"performanceConfig": {"latency": "optimized"} # 👈 EITHER HERE OR ON CONFIG.YAML
}'
```
</TabItem>
</Tabs>
## Usage - Bedrock Guardrails ## Usage - Bedrock Guardrails
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html) Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
@ -1776,6 +1828,7 @@ response = completion(
) )
``` ```
</TabItem> </TabItem>
<TabItem value="proxy" label="PROXY"> <TabItem value="proxy" label="PROXY">
1. Setup config.yaml 1. Setup config.yaml
@ -1820,11 +1873,13 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
``` ```
</TabItem> </TabItem>
</Tabs> </Tabs>
### SSO Login (AWS Profile) ### SSO Login (AWS Profile)
- Set `AWS_PROFILE` environment variable - Set `AWS_PROFILE` environment variable
- Make bedrock completion call - Make bedrock completion call
```python ```python
import os import os
from litellm import completion from litellm import completion
@ -1917,12 +1972,46 @@ model_list:
</Tabs> </Tabs>
Text to Image :
```bash
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
-d '{
"model": "amazon.nova-canvas-v1:0",
"prompt": "A cute baby sea otter"
}'
```
Color Guided Generation:
```bash
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
-d '{
"model": "amazon.nova-canvas-v1:0",
"prompt": "A cute baby sea otter",
"taskType": "COLOR_GUIDED_GENERATION",
"colorGuidedGenerationParams":{"colors":["#FFFFFF"]}
}'
```
| Model Name | Function Call |
|-------------------------|---------------------------------------------|
| Stable Diffusion 3 - v0 | `image_generation(model="bedrock/stability.stability.sd3-large-v1:0", prompt=prompt)` |
| Stable Diffusion - v0 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v0", prompt=prompt)` |
| Stable Diffusion - v1 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v1", prompt=prompt)` |
| Amazon Nova Canvas - v0 | `image_generation(model="bedrock/amazon.nova-canvas-v1:0", prompt=prompt)` |
### Passing an external BedrockRuntime.Client as a parameter - Completion() ### Passing an external BedrockRuntime.Client as a parameter - Completion()
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
:::warning :::warning
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
Experimental - 2024-Jun-23: Experimental - 2024-Jun-23:

View file

@ -589,8 +589,10 @@ response = litellm.completion(
"content": [ "content": [
{"type": "text", "text": "Please summarize the audio."}, {"type": "text", "text": "Please summarize the audio."},
{ {
"type": "image_url", "type": "file",
"image_url": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA "file": {
"file_data": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
}
}, },
], ],
} }
@ -640,8 +642,11 @@ response = litellm.completion(
"content": [ "content": [
{"type": "text", "text": "Please summarize the file."}, {"type": "text", "text": "Please summarize the file."},
{ {
"type": "image_url", "type": "file",
"image_url": "https://storage..." # 👈 SET THE IMG URL "file": {
"file_id": "https://storage...", # 👈 SET THE IMG URL
"format": "application/pdf" # OPTIONAL
}
}, },
], ],
} }
@ -668,8 +673,11 @@ response = litellm.completion(
"content": [ "content": [
{"type": "text", "text": "Please summarize the file."}, {"type": "text", "text": "Please summarize the file."},
{ {
"type": "image_url", "type": "file",
"image_url": "gs://..." # 👈 SET THE cloud storage bucket url "file": {
"file_id": "gs://storage...", # 👈 SET THE IMG URL
"format": "application/pdf" # OPTIONAL
}
}, },
], ],
} }

View file

@ -325,6 +325,74 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` | | fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
## OpenAI Audio Transcription
LiteLLM supports OpenAI Audio Transcription endpoint.
Supported models:
| Model Name | Function Call |
|---------------------------|-----------------------------------------------------------------|
| `whisper-1` | `response = completion(model="whisper-1", file=audio_file)` |
| `gpt-4o-transcribe` | `response = completion(model="gpt-4o-transcribe", file=audio_file)` |
| `gpt-4o-mini-transcribe` | `response = completion(model="gpt-4o-mini-transcribe", file=audio_file)` |
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import transcription
import os
# set api keys
os.environ["OPENAI_API_KEY"] = ""
audio_file = open("/path/to/audio.mp3", "rb")
response = transcription(model="gpt-4o-transcribe", file=audio_file)
print(f"response: {response}")
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: gpt-4o-transcribe
litellm_params:
model: gpt-4o-transcribe
api_key: os.environ/OPENAI_API_KEY
model_info:
mode: audio_transcription
general_settings:
master_key: sk-1234
```
2. Start the proxy
```bash
litellm --config config.yaml
```
3. Test it!
```bash
curl --location 'http://0.0.0.0:8000/v1/audio/transcriptions' \
--header 'Authorization: Bearer sk-1234' \
--form 'file=@"/Users/krrishdholakia/Downloads/gettysburg.wav"' \
--form 'model="gpt-4o-transcribe"'
```
</TabItem>
</Tabs>
## Advanced ## Advanced
### Getting OpenAI API Response Headers ### Getting OpenAI API Response Headers

View file

@ -1369,6 +1369,103 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
</Tabs> </Tabs>
## Gemini Pro
| Model Name | Function Call |
|------------------|--------------------------------------|
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
## Fine-tuned Models
You can call fine-tuned Vertex AI Gemini models through LiteLLM
| Property | Details |
|----------|---------|
| Provider Route | `vertex_ai/gemini/{MODEL_ID}` |
| Vertex Documentation | [Vertex AI - Fine-tuned Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning#test_the_tuned_model_with_a_prompt)|
| Supported Operations | `/chat/completions`, `/completions`, `/embeddings`, `/images` |
To use a model that follows the `/gemini` request/response format, simply set the model parameter as
```python title="Model parameter for calling fine-tuned gemini models"
model="vertex_ai/gemini/<your-finetuned-model>"
```
<Tabs>
<TabItem value="sdk" label="LiteLLM Python SDK">
```python showLineNumbers title="Example"
import litellm
import os
## set ENV variables
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
os.environ["VERTEXAI_LOCATION"] = "us-central1"
response = litellm.completion(
model="vertex_ai/gemini/<your-finetuned-model>", # e.g. vertex_ai/gemini/4965075652664360960
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
```
</TabItem>
<TabItem value="proxy" label="LiteLLM Proxy">
1. Add Vertex Credentials to your env
```bash title="Authenticate to Vertex AI"
!gcloud auth application-default login
```
2. Setup config.yaml
```yaml showLineNumbers title="Add to litellm config"
- model_name: finetuned-gemini
litellm_params:
model: vertex_ai/gemini/<ENDPOINT_ID>
vertex_project: <PROJECT_ID>
vertex_location: <LOCATION>
```
3. Test it!
<Tabs>
<TabItem value="openai" label="OpenAI Python SDK">
```python showLineNumbers title="Example request"
from openai import OpenAI
client = OpenAI(
api_key="your-litellm-key",
base_url="http://0.0.0.0:4000"
)
response = client.chat.completions.create(
model="finetuned-gemini",
messages=[
{"role": "user", "content": "hi"}
]
)
print(response)
```
</TabItem>
<TabItem value="curl" label="curl">
```bash showLineNumbers title="Example request"
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: <LITELLM_KEY>' \
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
## Model Garden ## Model Garden
:::tip :::tip
@ -1479,67 +1576,6 @@ response = completion(
</Tabs> </Tabs>
## Gemini Pro
| Model Name | Function Call |
|------------------|--------------------------------------|
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
## Fine-tuned Models
Fine tuned models on vertex have a numerical model/endpoint id.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
import os
## set ENV variables
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
os.environ["VERTEXAI_LOCATION"] = "us-central1"
response = completion(
model="vertex_ai/<your-finetuned-model>", # e.g. vertex_ai/4965075652664360960
messages=[{ "content": "Hello, how are you?","role": "user"}],
base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Add Vertex Credentials to your env
```bash
!gcloud auth application-default login
```
2. Setup config.yaml
```yaml
- model_name: finetuned-gemini
litellm_params:
model: vertex_ai/<ENDPOINT_ID>
vertex_project: <PROJECT_ID>
vertex_location: <LOCATION>
model_info:
base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
```
3. Test it!
```bash
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: <LITELLM_KEY>' \
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
```
</TabItem>
</Tabs>
## Gemini Pro Vision ## Gemini Pro Vision
| Model Name | Function Call | | Model Name | Function Call |
@ -1684,23 +1720,25 @@ assert isinstance(
``` ```
## Usage - PDF / Videos / etc. Files ## Usage - PDF / Videos / Audio etc. Files
Pass any file supported by Vertex AI, through LiteLLM. Pass any file supported by Vertex AI, through LiteLLM.
LiteLLM Supports the following image types passed in url LiteLLM Supports the following file types passed in url.
Using `file` message type for VertexAI is live from v1.65.1+
``` ```
Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg Files with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg Files with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4 Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
Base64 Encoded Local Images Base64 Encoded Local Files
``` ```
<Tabs> <Tabs>
<TabItem value="sdk" label="SDK"> <TabItem value="sdk" label="SDK">
### **Using `gs://`** ### **Using `gs://` or any URL**
```python ```python
from litellm import completion from litellm import completion
@ -1712,8 +1750,11 @@ response = completion(
"content": [ "content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."}, {"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{ {
"type": "image_url", "type": "file",
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", # 👈 PDF "file": {
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
"format": "application/pdf" # OPTIONAL - specify mime-type
}
}, },
], ],
} }
@ -1747,8 +1788,16 @@ response = completion(
"content": [ "content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."}, {"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{ {
"type": "image_url", "type": "file",
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF "file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
},
{
"type": "audio_input",
"audio_input {
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
}
}, },
], ],
} }
@ -1794,8 +1843,11 @@ curl http://0.0.0.0:4000/v1/chat/completions \
"text": "You are a very professional document summarization specialist. Please summarize the given document" "text": "You are a very professional document summarization specialist. Please summarize the given document"
}, },
{ {
"type": "image_url", "type": "file",
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf" # 👈 PDF "file": {
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
"format": "application/pdf" # OPTIONAL
}
} }
} }
] ]
@ -1822,10 +1874,17 @@ curl http://0.0.0.0:4000/v1/chat/completions \
"text": "You are a very professional document summarization specialist. Please summarize the given document" "text": "You are a very professional document summarization specialist. Please summarize the given document"
}, },
{ {
"type": "image_url", "type": "file",
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF "file": {
} "file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
},
},
{
"type": "audio_input",
"audio_input {
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
} }
},
] ]
} }
], ],
@ -1836,6 +1895,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
</TabItem> </TabItem>
</Tabs> </Tabs>
## Chat Models ## Chat Models
| Model Name | Function Call | | Model Name | Function Call |
|------------------|--------------------------------------| |------------------|--------------------------------------|
@ -2044,7 +2104,12 @@ print(response)
## **Multi-Modal Embeddings** ## **Multi-Modal Embeddings**
Usage
Known Limitations:
- Only supports 1 image / video / image per request
- Only supports GCS or base64 encoded images / videos
### Usage
<Tabs> <Tabs>
<TabItem value="sdk" label="SDK"> <TabItem value="sdk" label="SDK">
@ -2260,6 +2325,115 @@ print(f"Text Embedding: {embeddings.text_embedding}")
</Tabs> </Tabs>
### Text + Image + Video Embeddings
<Tabs>
<TabItem value="sdk" label="SDK">
Text + Image
```python
response = await litellm.aembedding(
model="vertex_ai/multimodalembedding@001",
input=["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"] # will be sent as a gcs image
)
```
Text + Video
```python
response = await litellm.aembedding(
model="vertex_ai/multimodalembedding@001",
input=["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
)
```
Image + Video
```python
response = await litellm.aembedding(
model="vertex_ai/multimodalembedding@001",
input=["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
)
```
</TabItem>
<TabItem value="proxy" label="LiteLLM PROXY (Unified Endpoint)">
1. Add model to config.yaml
```yaml
model_list:
- model_name: multimodalembedding@001
litellm_params:
model: vertex_ai/multimodalembedding@001
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: adroit-crow-413218-a956eef1a2a8.json
litellm_settings:
drop_params: True
```
2. Start Proxy
```
$ litellm --config /path/to/config.yaml
```
3. Make Request use OpenAI Python SDK, Langchain Python SDK
Text + Image
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.embeddings.create(
model="multimodalembedding@001",
input = ["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"],
)
print(response)
```
Text + Video
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.embeddings.create(
model="multimodalembedding@001",
input = ["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"],
)
print(response)
```
Image + Video
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.embeddings.create(
model="multimodalembedding@001",
input = ["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"],
)
print(response)
```
</TabItem>
</Tabs>
## **Image Generation Models** ## **Image Generation Models**
Usage Usage

View file

@ -147,6 +147,11 @@ Some SSO providers require a specific redirect url for login and logout. You can
- Login: `<your-proxy-base-url>/sso/key/generate` - Login: `<your-proxy-base-url>/sso/key/generate`
- Logout: `<your-proxy-base-url>` - Logout: `<your-proxy-base-url>`
Here's the env var to set the logout url on the proxy
```bash
PROXY_LOGOUT_URL="https://www.google.com"
```
#### Step 3. Set `PROXY_BASE_URL` in your .env #### Step 3. Set `PROXY_BASE_URL` in your .env
Set this in your .env (so the proxy can set the correct redirect url) Set this in your .env (so the proxy can set the correct redirect url)

View file

@ -160,7 +160,7 @@ general_settings:
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) | | database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) | | database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) | | database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key | | allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key [Doc on graceful db unavailability](prod#5-if-running-litellm-on-vpc-gracefully-handle-db-unavailability) |
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) | | custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
| max_parallel_requests | integer | The max parallel requests allowed per deployment | | max_parallel_requests | integer | The max parallel requests allowed per deployment |
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall | | global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
@ -479,7 +479,7 @@ router_settings:
| PROXY_ADMIN_ID | Admin identifier for proxy server | PROXY_ADMIN_ID | Admin identifier for proxy server
| PROXY_BASE_URL | Base URL for proxy service | PROXY_BASE_URL | Base URL for proxy service
| PROXY_LOGOUT_URL | URL for logging out of the proxy service | PROXY_LOGOUT_URL | URL for logging out of the proxy service
| PROXY_MASTER_KEY | Master key for proxy authentication | LITELLM_MASTER_KEY | Master key for proxy authentication
| QDRANT_API_BASE | Base URL for Qdrant API | QDRANT_API_BASE | Base URL for Qdrant API
| QDRANT_API_KEY | API key for Qdrant service | QDRANT_API_KEY | API key for Qdrant service
| QDRANT_URL | Connection URL for Qdrant database | QDRANT_URL | Connection URL for Qdrant database
@ -515,4 +515,5 @@ router_settings:
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse | UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication | UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption | USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
| USE_PRISMA_MIGRATE | Flag to use prisma migrate instead of prisma db push. Recommended for production environments.
| WEBHOOK_URL | URL for receiving webhooks from external services | WEBHOOK_URL | URL for receiving webhooks from external services

View file

@ -94,15 +94,31 @@ This disables the load_dotenv() functionality, which will automatically load you
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability ## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
This will allow LiteLLM to continue to process requests even if the DB is unavailable. This is better handling for DB unavailability. When running LiteLLM on a VPC (and inaccessible from the public internet), you can enable graceful degradation so that request processing continues even if the database is temporarily unavailable.
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.** **WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
```yaml #### Configuration
```yaml showLineNumbers title="litellm config.yaml"
general_settings: general_settings:
allow_requests_on_db_unavailable: True allow_requests_on_db_unavailable: True
``` ```
#### Expected Behavior
When `allow_requests_on_db_unavailable` is set to `true`, LiteLLM will handle errors as follows:
| Type of Error | Expected Behavior | Details |
|---------------|-------------------|----------------|
| Prisma Errors | ✅ Request will be allowed | Covers issues like DB connection resets or rejections from the DB via Prisma, the ORM used by LiteLLM. |
| Httpx Errors | ✅ Request will be allowed | Occurs when the database is unreachable, allowing the request to proceed despite the DB outage. |
| Pod Startup Behavior | ✅ Pods start regardless | LiteLLM Pods will start even if the database is down or unreachable, ensuring higher uptime guarantees for deployments. |
| Health/Readiness Check | ✅ Always returns 200 OK | The /health/readiness endpoint returns a 200 OK status to ensure that pods remain operational even when the database is unavailable.
| LiteLLM Budget Errors or Model Errors | ❌ Request will be blocked | Triggered when the DB is reachable but the authentication token is invalid, lacks access, or exceeds budget limits. |
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI ## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
By default, LiteLLM writes several types of logs to the database: By default, LiteLLM writes several types of logs to the database:
@ -183,93 +199,3 @@ You should only see the following level of details in logs on the proxy server
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK # INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK # INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
``` ```
### Machine Specifications to Deploy LiteLLM
| Service | Spec | CPUs | Memory | Architecture | Version|
| --- | --- | --- | --- | --- | --- |
| Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
| Redis Cache | - | - | - | - | 7.0+ Redis Engine|
### Reference Kubernetes Deployment YAML
Reference Kubernetes `deployment.yaml` that was load tested by us
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-deployment
spec:
replicas: 3
selector:
matchLabels:
app: litellm
template:
metadata:
labels:
app: litellm
spec:
containers:
- name: litellm-container
image: ghcr.io/berriai/litellm:main-latest
imagePullPolicy: Always
env:
- name: AZURE_API_KEY
value: "d6******"
- name: AZURE_API_BASE
value: "https://ope******"
- name: LITELLM_MASTER_KEY
value: "sk-1234"
- name: DATABASE_URL
value: "po**********"
args:
- "--config"
- "/app/proxy_config.yaml" # Update the path to mount the config file
volumeMounts: # Define volume mount for proxy_config.yaml
- name: config-volume
mountPath: /app
readOnly: true
livenessProbe:
httpGet:
path: /health/liveliness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
successThreshold: 1
failureThreshold: 3
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /health/readiness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
successThreshold: 1
failureThreshold: 3
timeoutSeconds: 10
volumes: # Define volume to mount proxy_config.yaml
- name: config-volume
configMap:
name: litellm-config
```
Reference Kubernetes `service.yaml` that was load tested by us
```yaml
apiVersion: v1
kind: Service
metadata:
name: litellm-service
spec:
selector:
app: litellm
ports:
- protocol: TCP
port: 4000
targetPort: 4000
type: LoadBalancer
```

View file

@ -188,7 +188,13 @@ Currently implemented for:
- OpenAI (if OPENAI_API_KEY is set) - OpenAI (if OPENAI_API_KEY is set)
- Fireworks AI (if FIREWORKS_AI_API_KEY is set) - Fireworks AI (if FIREWORKS_AI_API_KEY is set)
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set) - LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
- Gemini (if GEMINI_API_KEY is set)
- XAI (if XAI_API_KEY is set)
- Anthropic (if ANTHROPIC_API_KEY is set)
You can also specify a custom provider to check:
**All providers**:
```python ```python
from litellm import get_valid_models from litellm import get_valid_models
@ -196,6 +202,14 @@ valid_models = get_valid_models(check_provider_endpoint=True)
print(valid_models) print(valid_models)
``` ```
**Specific provider**:
```python
from litellm import get_valid_models
valid_models = get_valid_models(check_provider_endpoint=True, custom_llm_provider="openai")
print(valid_models)
```
### `validate_environment(model: str)` ### `validate_environment(model: str)`
This helper tells you if you have all the required environment variables for a model, and if not - what's missing. This helper tells you if you have all the required environment variables for a model, and if not - what's missing.

View file

@ -98,6 +98,5 @@ On the models dropdown select `thinking-anthropic-claude-3-7-sonnet`
<Image img={require('../../img/litellm_thinking_openweb.gif')} /> <Image img={require('../../img/litellm_thinking_openweb.gif')} />
## Additional Resources
- Running LiteLLM and OpenWebUI on Windows Localhost: A Comprehensive Guide [https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/](https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/)

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 237 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

View file

@ -24,6 +24,7 @@ This release brings:
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles) - LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
- Perf improvements for Usage-based Routing - Perf improvements for Usage-based Routing
- Streaming guardrail support via websockets - Streaming guardrail support via websockets
- Azure OpenAI client perf fix (from previous release)
## Docker Run LiteLLM Proxy ## Docker Run LiteLLM Proxy
@ -31,7 +32,7 @@ This release brings:
docker run docker run
-e STORE_MODEL_IN_DB=True -e STORE_MODEL_IN_DB=True
-p 4000:4000 -p 4000:4000
ghcr.io/berriai/litellm:main-v1.63.14-stable ghcr.io/berriai/litellm:main-v1.63.14-stable.patch1
``` ```
## Demo Instance ## Demo Instance

View file

@ -0,0 +1,160 @@
---
title: v1.65.0-stable - Model Context Protocol
slug: v1.65.0-stable
date: 2025-03-30T10:00:00
authors:
- name: Krrish Dholakia
title: CEO, LiteLLM
url: https://www.linkedin.com/in/krish-d/
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
- name: Ishaan Jaffer
title: CTO, LiteLLM
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
tags: [mcp, custom_prompt_management]
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
v1.65.0-stable is live now. Here are the key highlights of this release:
- **MCP Support**: Support for adding and using MCP servers on the LiteLLM proxy.
- **UI view total usage after 1M+ logs**: You can now view usage analytics after crossing 1M+ logs in DB.
## Model Context Protocol (MCP)
This release introduces support for centrally adding MCP servers on LiteLLM. This allows you to add MCP server endpoints and your developers can `list` and `call` MCP tools through LiteLLM.
Read more about MCP [here](https://docs.litellm.ai/docs/mcp).
<Image
img={require('../../img/release_notes/mcp_ui.png')}
style={{width: '100%', display: 'block', margin: '2rem auto'}}
/>
<p style={{textAlign: 'left', color: '#666'}}>
Expose and use MCP servers through LiteLLM
</p>
## UI view total usage after 1M+ logs
This release brings the ability to view total usage analytics even after exceeding 1M+ logs in your database. We've implemented a scalable architecture that stores only aggregate usage data, resulting in significantly more efficient queries and reduced database CPU utilization.
<Image
img={require('../../img/release_notes/ui_usage.png')}
style={{width: '100%', display: 'block', margin: '2rem auto'}}
/>
<p style={{textAlign: 'left', color: '#666'}}>
View total usage after 1M+ logs
</p>
- How this works:
- We now aggregate usage data into a dedicated DailyUserSpend table, significantly reducing query load and CPU usage even beyond 1M+ logs.
- Daily Spend Breakdown API:
- Retrieve granular daily usage data (by model, provider, and API key) with a single endpoint.
Example Request:
```shell title="Daily Spend Breakdown API" showLineNumbers
curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \
-H 'Authorization: Bearer sk-...'
```
```json title="Daily Spend Breakdown API Response" showLineNumbers
{
"results": [
{
"date": "2025-03-27",
"metrics": {
"spend": 0.0177072,
"prompt_tokens": 111,
"completion_tokens": 1711,
"total_tokens": 1822,
"api_requests": 11
},
"breakdown": {
"models": {
"gpt-4o-mini": {
"spend": 1.095e-05,
"prompt_tokens": 37,
"completion_tokens": 9,
"total_tokens": 46,
"api_requests": 1
},
"providers": { "openai": { ... }, "azure_ai": { ... } },
"api_keys": { "3126b6eaf1...": { ... } }
}
}
],
"metadata": {
"total_spend": 0.7274667,
"total_prompt_tokens": 280990,
"total_completion_tokens": 376674,
"total_api_requests": 14
}
}
```
## New Models / Updated Models
- Support for Vertex AI gemini-2.0-flash-lite & Google AI Studio gemini-2.0-flash-lite [PR](https://github.com/BerriAI/litellm/pull/9523)
- Support for Vertex AI Fine-Tuned LLMs [PR](https://github.com/BerriAI/litellm/pull/9542)
- Nova Canvas image generation support [PR](https://github.com/BerriAI/litellm/pull/9525)
- OpenAI gpt-4o-transcribe support [PR](https://github.com/BerriAI/litellm/pull/9517)
- Added new Vertex AI text embedding model [PR](https://github.com/BerriAI/litellm/pull/9476)
## LLM Translation
- OpenAI Web Search Tool Call Support [PR](https://github.com/BerriAI/litellm/pull/9465)
- Vertex AI topLogprobs support [PR](https://github.com/BerriAI/litellm/pull/9518)
- Support for sending images and video to Vertex AI multimodal embedding [Doc](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings)
- Support litellm.api_base for Vertex AI + Gemini across completion, embedding, image_generation [PR](https://github.com/BerriAI/litellm/pull/9516)
- Bug fix for returning `response_cost` when using litellm python SDK with LiteLLM Proxy [PR](https://github.com/BerriAI/litellm/commit/6fd18651d129d606182ff4b980e95768fc43ca3d)
- Support for `max_completion_tokens` on Mistral API [PR](https://github.com/BerriAI/litellm/pull/9606)
- Refactored Vertex AI passthrough routes - fixes unpredictable behaviour with auto-setting default_vertex_region on router model add [PR](https://github.com/BerriAI/litellm/pull/9467)
## Spend Tracking Improvements
- Log 'api_base' on spend logs [PR](https://github.com/BerriAI/litellm/pull/9509)
- Support for Gemini audio token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
- Fixed OpenAI audio input token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
## UI
### Model Management
- Allowed team admins to add/update/delete models on UI [PR](https://github.com/BerriAI/litellm/pull/9572)
- Added render supports_web_search on model hub [PR](https://github.com/BerriAI/litellm/pull/9469)
### Request Logs
- Show API base and model ID on request logs [PR](https://github.com/BerriAI/litellm/pull/9572)
- Allow viewing keyinfo on request logs [PR](https://github.com/BerriAI/litellm/pull/9568)
### Usage Tab
- Added Daily User Spend Aggregate view - allows UI Usage tab to work > 1m rows [PR](https://github.com/BerriAI/litellm/pull/9538)
- Connected UI to "LiteLLM_DailyUserSpend" spend table [PR](https://github.com/BerriAI/litellm/pull/9603)
## Logging Integrations
- Fixed StandardLoggingPayload for GCS Pub Sub Logging Integration [PR](https://github.com/BerriAI/litellm/pull/9508)
- Track `litellm_model_name` on `StandardLoggingPayload` [Docs](https://docs.litellm.ai/docs/proxy/logging_spec#standardlogginghiddenparams)
## Performance / Reliability Improvements
- LiteLLM Redis semantic caching implementation [PR](https://github.com/BerriAI/litellm/pull/9356)
- Gracefully handle exceptions when DB is having an outage [PR](https://github.com/BerriAI/litellm/pull/9533)
- Allow Pods to startup + passing /health/readiness when allow_requests_on_db_unavailable: True and DB is down [PR](https://github.com/BerriAI/litellm/pull/9569)
## General Improvements
- Support for exposing MCP tools on litellm proxy [PR](https://github.com/BerriAI/litellm/pull/9426)
- Support discovering Gemini, Anthropic, xAI models by calling their /v1/model endpoint [PR](https://github.com/BerriAI/litellm/pull/9530)
- Fixed route check for non-proxy admins on JWT auth [PR](https://github.com/BerriAI/litellm/pull/9454)
- Added baseline Prisma database migrations [PR](https://github.com/BerriAI/litellm/pull/9565)
- View all wildcard models on /model/info [PR](https://github.com/BerriAI/litellm/pull/9572)
## Security
- Bumped next from 14.2.21 to 14.2.25 in UI dashboard [PR](https://github.com/BerriAI/litellm/pull/9458)
## Complete Git Diff
[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.63.14-stable.patch1...v1.65.0-stable)

View file

@ -0,0 +1,34 @@
---
title: v1.65.0 - Team Model Add - update
slug: v1.65.0
date: 2025-03-28T10:00:00
authors:
- name: Krrish Dholakia
title: CEO, LiteLLM
url: https://www.linkedin.com/in/krish-d/
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
- name: Ishaan Jaffer
title: CTO, LiteLLM
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
tags: [management endpoints, team models, ui]
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
v1.65.0 updates the `/model/new` endpoint to prevent non-team admins from creating team models.
This means that only proxy admins or team admins can create team models.
## Additional Changes
- Allows team admins to call `/model/update` to update team models.
- Allows team admins to call `/model/delete` to delete team models.
- Introduces new `user_models_only` param to `/v2/model/info` - only return models added by this user.
These changes enable team admins to add and manage models for their team on the LiteLLM UI + API.
<Image img={require('../../img/release_notes/team_model_add.png')} />

View file

@ -304,7 +304,6 @@ const sidebars = {
"image_variations", "image_variations",
] ]
}, },
"mcp",
{ {
type: "category", type: "category",
label: "/audio", label: "/audio",

View file

@ -444,9 +444,7 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
detected_secrets = [] detected_secrets = []
for file in secrets.files: for file in secrets.files:
for found_secret in secrets[file]: for found_secret in secrets[file]:
if found_secret.secret_value is None: if found_secret.secret_value is None:
continue continue
detected_secrets.append( detected_secrets.append(
@ -471,14 +469,12 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
data: dict, data: dict,
call_type: str, # "completion", "embeddings", "image_generation", "moderation" call_type: str, # "completion", "embeddings", "image_generation", "moderation"
): ):
if await self.should_run_check(user_api_key_dict) is False: if await self.should_run_check(user_api_key_dict) is False:
return return
if "messages" in data and isinstance(data["messages"], list): if "messages" in data and isinstance(data["messages"], list):
for message in data["messages"]: for message in data["messages"]:
if "content" in message and isinstance(message["content"], str): if "content" in message and isinstance(message["content"], str):
detected_secrets = self.scan_message_for_secrets(message["content"]) detected_secrets = self.scan_message_for_secrets(message["content"])
for secret in detected_secrets: for secret in detected_secrets:

View file

@ -0,0 +1,26 @@
Portions of this software are licensed as follows:
* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
---
MIT License
Copyright (c) 2023 Berri AI
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,21 @@
Additional files for the proxy. Reduces the size of the main litellm package.
Currently, only stores the migration.sql files for litellm-proxy.
To install, run:
```bash
pip install litellm-proxy-extras
```
OR
```bash
pip install litellm[proxy] # installs litellm-proxy-extras and other proxy dependencies.
```
To use the migrations, run:
```bash
litellm --use_prisma_migrate
```

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,12 @@
import logging
# Set up package logger
logger = logging.getLogger("litellm_proxy_extras")
if not logger.handlers: # Only add handler if none exists
handler = logging.StreamHandler()
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

View file

@ -0,0 +1,360 @@
-- CreateTable
CREATE TABLE "LiteLLM_BudgetTable" (
"budget_id" TEXT NOT NULL,
"max_budget" DOUBLE PRECISION,
"soft_budget" DOUBLE PRECISION,
"max_parallel_requests" INTEGER,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"model_max_budget" JSONB,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_BudgetTable_pkey" PRIMARY KEY ("budget_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_CredentialsTable" (
"credential_id" TEXT NOT NULL,
"credential_name" TEXT NOT NULL,
"credential_values" JSONB NOT NULL,
"credential_info" JSONB,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_CredentialsTable_pkey" PRIMARY KEY ("credential_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_ProxyModelTable" (
"model_id" TEXT NOT NULL,
"model_name" TEXT NOT NULL,
"litellm_params" JSONB NOT NULL,
"model_info" JSONB,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_ProxyModelTable_pkey" PRIMARY KEY ("model_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_OrganizationTable" (
"organization_id" TEXT NOT NULL,
"organization_alias" TEXT NOT NULL,
"budget_id" TEXT NOT NULL,
"metadata" JSONB NOT NULL DEFAULT '{}',
"models" TEXT[],
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"model_spend" JSONB NOT NULL DEFAULT '{}',
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_OrganizationTable_pkey" PRIMARY KEY ("organization_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_ModelTable" (
"id" SERIAL NOT NULL,
"aliases" JSONB,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_ModelTable_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "LiteLLM_TeamTable" (
"team_id" TEXT NOT NULL,
"team_alias" TEXT,
"organization_id" TEXT,
"admins" TEXT[],
"members" TEXT[],
"members_with_roles" JSONB NOT NULL DEFAULT '{}',
"metadata" JSONB NOT NULL DEFAULT '{}',
"max_budget" DOUBLE PRECISION,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"models" TEXT[],
"max_parallel_requests" INTEGER,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"blocked" BOOLEAN NOT NULL DEFAULT false,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"model_spend" JSONB NOT NULL DEFAULT '{}',
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
"model_id" INTEGER,
CONSTRAINT "LiteLLM_TeamTable_pkey" PRIMARY KEY ("team_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_UserTable" (
"user_id" TEXT NOT NULL,
"user_alias" TEXT,
"team_id" TEXT,
"sso_user_id" TEXT,
"organization_id" TEXT,
"password" TEXT,
"teams" TEXT[] DEFAULT ARRAY[]::TEXT[],
"user_role" TEXT,
"max_budget" DOUBLE PRECISION,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"user_email" TEXT,
"models" TEXT[],
"metadata" JSONB NOT NULL DEFAULT '{}',
"max_parallel_requests" INTEGER,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
"model_spend" JSONB NOT NULL DEFAULT '{}',
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "LiteLLM_UserTable_pkey" PRIMARY KEY ("user_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_VerificationToken" (
"token" TEXT NOT NULL,
"key_name" TEXT,
"key_alias" TEXT,
"soft_budget_cooldown" BOOLEAN NOT NULL DEFAULT false,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"expires" TIMESTAMP(3),
"models" TEXT[],
"aliases" JSONB NOT NULL DEFAULT '{}',
"config" JSONB NOT NULL DEFAULT '{}',
"user_id" TEXT,
"team_id" TEXT,
"permissions" JSONB NOT NULL DEFAULT '{}',
"max_parallel_requests" INTEGER,
"metadata" JSONB NOT NULL DEFAULT '{}',
"blocked" BOOLEAN,
"tpm_limit" BIGINT,
"rpm_limit" BIGINT,
"max_budget" DOUBLE PRECISION,
"budget_duration" TEXT,
"budget_reset_at" TIMESTAMP(3),
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
"model_spend" JSONB NOT NULL DEFAULT '{}',
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
"budget_id" TEXT,
"organization_id" TEXT,
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"created_by" TEXT,
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"updated_by" TEXT,
CONSTRAINT "LiteLLM_VerificationToken_pkey" PRIMARY KEY ("token")
);
-- CreateTable
CREATE TABLE "LiteLLM_EndUserTable" (
"user_id" TEXT NOT NULL,
"alias" TEXT,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"allowed_model_region" TEXT,
"default_model" TEXT,
"budget_id" TEXT,
"blocked" BOOLEAN NOT NULL DEFAULT false,
CONSTRAINT "LiteLLM_EndUserTable_pkey" PRIMARY KEY ("user_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_Config" (
"param_name" TEXT NOT NULL,
"param_value" JSONB,
CONSTRAINT "LiteLLM_Config_pkey" PRIMARY KEY ("param_name")
);
-- CreateTable
CREATE TABLE "LiteLLM_SpendLogs" (
"request_id" TEXT NOT NULL,
"call_type" TEXT NOT NULL,
"api_key" TEXT NOT NULL DEFAULT '',
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"total_tokens" INTEGER NOT NULL DEFAULT 0,
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
"startTime" TIMESTAMP(3) NOT NULL,
"endTime" TIMESTAMP(3) NOT NULL,
"completionStartTime" TIMESTAMP(3),
"model" TEXT NOT NULL DEFAULT '',
"model_id" TEXT DEFAULT '',
"model_group" TEXT DEFAULT '',
"custom_llm_provider" TEXT DEFAULT '',
"api_base" TEXT DEFAULT '',
"user" TEXT DEFAULT '',
"metadata" JSONB DEFAULT '{}',
"cache_hit" TEXT DEFAULT '',
"cache_key" TEXT DEFAULT '',
"request_tags" JSONB DEFAULT '[]',
"team_id" TEXT,
"end_user" TEXT,
"requester_ip_address" TEXT,
"messages" JSONB DEFAULT '{}',
"response" JSONB DEFAULT '{}',
CONSTRAINT "LiteLLM_SpendLogs_pkey" PRIMARY KEY ("request_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_ErrorLogs" (
"request_id" TEXT NOT NULL,
"startTime" TIMESTAMP(3) NOT NULL,
"endTime" TIMESTAMP(3) NOT NULL,
"api_base" TEXT NOT NULL DEFAULT '',
"model_group" TEXT NOT NULL DEFAULT '',
"litellm_model_name" TEXT NOT NULL DEFAULT '',
"model_id" TEXT NOT NULL DEFAULT '',
"request_kwargs" JSONB NOT NULL DEFAULT '{}',
"exception_type" TEXT NOT NULL DEFAULT '',
"exception_string" TEXT NOT NULL DEFAULT '',
"status_code" TEXT NOT NULL DEFAULT '',
CONSTRAINT "LiteLLM_ErrorLogs_pkey" PRIMARY KEY ("request_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_UserNotifications" (
"request_id" TEXT NOT NULL,
"user_id" TEXT NOT NULL,
"models" TEXT[],
"justification" TEXT NOT NULL,
"status" TEXT NOT NULL,
CONSTRAINT "LiteLLM_UserNotifications_pkey" PRIMARY KEY ("request_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_TeamMembership" (
"user_id" TEXT NOT NULL,
"team_id" TEXT NOT NULL,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"budget_id" TEXT,
CONSTRAINT "LiteLLM_TeamMembership_pkey" PRIMARY KEY ("user_id","team_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_OrganizationMembership" (
"user_id" TEXT NOT NULL,
"organization_id" TEXT NOT NULL,
"user_role" TEXT,
"spend" DOUBLE PRECISION DEFAULT 0.0,
"budget_id" TEXT,
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "LiteLLM_OrganizationMembership_pkey" PRIMARY KEY ("user_id","organization_id")
);
-- CreateTable
CREATE TABLE "LiteLLM_InvitationLink" (
"id" TEXT NOT NULL,
"user_id" TEXT NOT NULL,
"is_accepted" BOOLEAN NOT NULL DEFAULT false,
"accepted_at" TIMESTAMP(3),
"expires_at" TIMESTAMP(3) NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL,
"created_by" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL,
"updated_by" TEXT NOT NULL,
CONSTRAINT "LiteLLM_InvitationLink_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "LiteLLM_AuditLog" (
"id" TEXT NOT NULL,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"changed_by" TEXT NOT NULL DEFAULT '',
"changed_by_api_key" TEXT NOT NULL DEFAULT '',
"action" TEXT NOT NULL,
"table_name" TEXT NOT NULL,
"object_id" TEXT NOT NULL,
"before_value" JSONB,
"updated_values" JSONB,
CONSTRAINT "LiteLLM_AuditLog_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_CredentialsTable_credential_name_key" ON "LiteLLM_CredentialsTable"("credential_name");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_TeamTable_model_id_key" ON "LiteLLM_TeamTable"("model_id");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_UserTable_sso_user_id_key" ON "LiteLLM_UserTable"("sso_user_id");
-- CreateIndex
CREATE INDEX "LiteLLM_SpendLogs_startTime_idx" ON "LiteLLM_SpendLogs"("startTime");
-- CreateIndex
CREATE INDEX "LiteLLM_SpendLogs_end_user_idx" ON "LiteLLM_SpendLogs"("end_user");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_OrganizationMembership_user_id_organization_id_key" ON "LiteLLM_OrganizationMembership"("user_id", "organization_id");
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationTable" ADD CONSTRAINT "LiteLLM_OrganizationTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_model_id_fkey" FOREIGN KEY ("model_id") REFERENCES "LiteLLM_ModelTable"("id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_UserTable" ADD CONSTRAINT "LiteLLM_UserTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_EndUserTable" ADD CONSTRAINT "LiteLLM_EndUserTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_TeamMembership" ADD CONSTRAINT "LiteLLM_TeamMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_updated_by_fkey" FOREIGN KEY ("updated_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;

View file

@ -0,0 +1,33 @@
-- CreateTable
CREATE TABLE "LiteLLM_DailyUserSpend" (
"id" TEXT NOT NULL,
"user_id" TEXT NOT NULL,
"date" TEXT NOT NULL,
"api_key" TEXT NOT NULL,
"model" TEXT NOT NULL,
"model_group" TEXT,
"custom_llm_provider" TEXT,
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL,
CONSTRAINT "LiteLLM_DailyUserSpend_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_date_idx" ON "LiteLLM_DailyUserSpend"("date");
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_user_id_idx" ON "LiteLLM_DailyUserSpend"("user_id");
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_api_key_idx" ON "LiteLLM_DailyUserSpend"("api_key");
-- CreateIndex
CREATE INDEX "LiteLLM_DailyUserSpend_model_idx" ON "LiteLLM_DailyUserSpend"("model");
-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_DailyUserSpend_user_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyUserSpend"("user_id", "date", "api_key", "model", "custom_llm_provider");

View file

@ -0,0 +1,3 @@
-- AlterTable
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "api_requests" INTEGER NOT NULL DEFAULT 0;

View file

@ -0,0 +1,14 @@
-- CreateEnum
CREATE TYPE "JobStatus" AS ENUM ('ACTIVE', 'INACTIVE');
-- CreateTable
CREATE TABLE "LiteLLM_CronJob" (
"cronjob_id" TEXT NOT NULL,
"pod_id" TEXT NOT NULL,
"status" "JobStatus" NOT NULL DEFAULT 'INACTIVE',
"last_updated" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"ttl" TIMESTAMP(3) NOT NULL,
CONSTRAINT "LiteLLM_CronJob_pkey" PRIMARY KEY ("cronjob_id")
);

View file

@ -0,0 +1 @@
provider = "postgresql"

View file

@ -0,0 +1,80 @@
import os
import random
import subprocess
import time
from typing import Optional
from litellm_proxy_extras._logging import logger
def str_to_bool(value: Optional[str]) -> bool:
if value is None:
return False
return value.lower() in ("true", "1", "t", "y", "yes")
class ProxyExtrasDBManager:
@staticmethod
def setup_database(schema_path: str, use_migrate: bool = False) -> bool:
"""
Set up the database using either prisma migrate or prisma db push
Uses migrations from litellm-proxy-extras package
Args:
schema_path (str): Path to the Prisma schema file
use_migrate (bool): Whether to use prisma migrate instead of db push
Returns:
bool: True if setup was successful, False otherwise
"""
use_migrate = str_to_bool(os.getenv("USE_PRISMA_MIGRATE")) or use_migrate
for attempt in range(4):
original_dir = os.getcwd()
schema_dir = os.path.dirname(schema_path)
os.chdir(schema_dir)
try:
if use_migrate:
logger.info("Running prisma migrate deploy")
try:
# Set migrations directory for Prisma
subprocess.run(
["prisma", "migrate", "deploy"],
timeout=60,
check=True,
capture_output=True,
text=True,
)
logger.info("prisma migrate deploy completed")
return True
except subprocess.CalledProcessError as e:
logger.info(f"prisma db error: {e.stderr}, e: {e.stdout}")
if (
"P3005" in e.stderr
and "database schema is not empty" in e.stderr
):
logger.info("Error: Database schema is not empty")
return False
else:
# Use prisma db push with increased timeout
subprocess.run(
["prisma", "db", "push", "--accept-data-loss"],
timeout=60,
check=True,
)
return True
except subprocess.TimeoutExpired:
logger.info(f"Attempt {attempt + 1} timed out")
time.sleep(random.randrange(5, 15))
except subprocess.CalledProcessError as e:
attempts_left = 3 - attempt
retry_msg = (
f" Retrying... ({attempts_left} attempts left)"
if attempts_left > 0
else ""
)
logger.info(f"The process failed to execute. Details: {e}.{retry_msg}")
time.sleep(random.randrange(5, 15))
finally:
os.chdir(original_dir)
return False

View file

@ -0,0 +1,30 @@
[tool.poetry]
name = "litellm-proxy-extras"
version = "0.1.1"
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
authors = ["BerriAI"]
readme = "README.md"
[tool.poetry.urls]
homepage = "https://litellm.ai"
Homepage = "https://litellm.ai"
repository = "https://github.com/BerriAI/litellm"
Repository = "https://github.com/BerriAI/litellm"
documentation = "https://docs.litellm.ai"
Documentation = "https://docs.litellm.ai"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0, !=3.9.7"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "0.1.1"
version_files = [
"pyproject.toml:version",
"../requirements.txt:litellm-proxy-extras==",
"../pyproject.toml:litellm-proxy-extras = {version = \""
]

View file

View file

@ -2,7 +2,7 @@
import warnings import warnings
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*") warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
### INIT VARIABLES ########## ### INIT VARIABLES ###########
import threading import threading
import os import os
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
@ -122,19 +122,19 @@ langsmith_batch_size: Optional[int] = None
prometheus_initialize_budget_metrics: Optional[bool] = False prometheus_initialize_budget_metrics: Optional[bool] = False
argilla_batch_size: Optional[int] = None argilla_batch_size: Optional[int] = None
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
gcs_pub_sub_use_v1: Optional[bool] = ( gcs_pub_sub_use_v1: Optional[
False # if you want to use v1 gcs pubsub logged payload bool
) ] = False # if you want to use v1 gcs pubsub logged payload
argilla_transformation_object: Optional[Dict[str, Any]] = None argilla_transformation_object: Optional[Dict[str, Any]] = None
_async_input_callback: List[Union[str, Callable, CustomLogger]] = ( _async_input_callback: List[
[] Union[str, Callable, CustomLogger]
) # internal variable - async custom callbacks are routed here. ] = [] # internal variable - async custom callbacks are routed here.
_async_success_callback: List[Union[str, Callable, CustomLogger]] = ( _async_success_callback: List[
[] Union[str, Callable, CustomLogger]
) # internal variable - async custom callbacks are routed here. ] = [] # internal variable - async custom callbacks are routed here.
_async_failure_callback: List[Union[str, Callable, CustomLogger]] = ( _async_failure_callback: List[
[] Union[str, Callable, CustomLogger]
) # internal variable - async custom callbacks are routed here. ] = [] # internal variable - async custom callbacks are routed here.
pre_call_rules: List[Callable] = [] pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = [] post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False turn_off_message_logging: Optional[bool] = False
@ -142,18 +142,18 @@ log_raw_request_response: bool = False
redact_messages_in_exceptions: Optional[bool] = False redact_messages_in_exceptions: Optional[bool] = False
redact_user_api_key_info: Optional[bool] = False redact_user_api_key_info: Optional[bool] = False
filter_invalid_headers: Optional[bool] = False filter_invalid_headers: Optional[bool] = False
add_user_information_to_llm_headers: Optional[bool] = ( add_user_information_to_llm_headers: Optional[
None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers bool
) ] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
store_audit_logs = False # Enterprise feature, allow users to see audit logs store_audit_logs = False # Enterprise feature, allow users to see audit logs
### end of callbacks ############# ### end of callbacks #############
email: Optional[str] = ( email: Optional[
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 str
) ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
token: Optional[str] = ( token: Optional[
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 str
) ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
telemetry = True telemetry = True
max_tokens = 256 # OpenAI Defaults max_tokens = 256 # OpenAI Defaults
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False)) drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
@ -229,24 +229,20 @@ enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
enable_caching_on_provider_specific_optional_params: bool = ( enable_caching_on_provider_specific_optional_params: bool = (
False # feature-flag for caching on optional params - e.g. 'top_k' False # feature-flag for caching on optional params - e.g. 'top_k'
) )
caching: bool = ( caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
) cache: Optional[
caching_with_models: bool = ( Cache
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 ] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
)
cache: Optional[Cache] = (
None # cache object <- use this - https://docs.litellm.ai/docs/caching
)
default_in_memory_ttl: Optional[float] = None default_in_memory_ttl: Optional[float] = None
default_redis_ttl: Optional[float] = None default_redis_ttl: Optional[float] = None
default_redis_batch_cache_expiry: Optional[float] = None default_redis_batch_cache_expiry: Optional[float] = None
model_alias_map: Dict[str, str] = {} model_alias_map: Dict[str, str] = {}
model_group_alias_map: Dict[str, str] = {} model_group_alias_map: Dict[str, str] = {}
max_budget: float = 0.0 # set the max budget across all providers max_budget: float = 0.0 # set the max budget across all providers
budget_duration: Optional[str] = ( budget_duration: Optional[
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). str
) ] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
default_soft_budget: float = ( default_soft_budget: float = (
50.0 # by default all litellm proxy keys have a soft budget of 50.0 50.0 # by default all litellm proxy keys have a soft budget of 50.0
) )
@ -255,15 +251,11 @@ forward_traceparent_to_llm_provider: bool = False
_current_cost = 0.0 # private variable, used if max budget is set _current_cost = 0.0 # private variable, used if max budget is set
error_logs: Dict = {} error_logs: Dict = {}
add_function_to_prompt: bool = ( add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
False # if function calling not supported by api, append function call details to system prompt
)
client_session: Optional[httpx.Client] = None client_session: Optional[httpx.Client] = None
aclient_session: Optional[httpx.AsyncClient] = None aclient_session: Optional[httpx.AsyncClient] = None
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks' model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
model_cost_map_url: str = ( model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
)
suppress_debug_info = False suppress_debug_info = False
dynamodb_table_name: Optional[str] = None dynamodb_table_name: Optional[str] = None
s3_callback_params: Optional[Dict] = None s3_callback_params: Optional[Dict] = None
@ -285,9 +277,7 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
custom_prometheus_metadata_labels: List[str] = [] custom_prometheus_metadata_labels: List[str] = []
#### REQUEST PRIORITIZATION #### #### REQUEST PRIORITIZATION ####
priority_reservation: Optional[Dict[str, float]] = None priority_reservation: Optional[Dict[str, float]] = None
force_ipv4: bool = ( force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
)
module_level_aclient = AsyncHTTPHandler( module_level_aclient = AsyncHTTPHandler(
timeout=request_timeout, client_alias="module level aclient" timeout=request_timeout, client_alias="module level aclient"
) )
@ -301,13 +291,13 @@ fallbacks: Optional[List] = None
context_window_fallbacks: Optional[List] = None context_window_fallbacks: Optional[List] = None
content_policy_fallbacks: Optional[List] = None content_policy_fallbacks: Optional[List] = None
allowed_fails: int = 3 allowed_fails: int = 3
num_retries_per_request: Optional[int] = ( num_retries_per_request: Optional[
None # for the request overall (incl. fallbacks + model retries) int
) ] = None # for the request overall (incl. fallbacks + model retries)
####### SECRET MANAGERS ##################### ####### SECRET MANAGERS #####################
secret_manager_client: Optional[Any] = ( secret_manager_client: Optional[
None # list of instantiated key management clients - e.g. azure kv, infisical, etc. Any
) ] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
_google_kms_resource_name: Optional[str] = None _google_kms_resource_name: Optional[str] = None
_key_management_system: Optional[KeyManagementSystem] = None _key_management_system: Optional[KeyManagementSystem] = None
_key_management_settings: KeyManagementSettings = KeyManagementSettings() _key_management_settings: KeyManagementSettings = KeyManagementSettings()
@ -813,6 +803,7 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig
from .llms.maritalk import MaritalkConfig from .llms.maritalk import MaritalkConfig
from .llms.openrouter.chat.transformation import OpenrouterConfig from .llms.openrouter.chat.transformation import OpenrouterConfig
from .llms.anthropic.chat.transformation import AnthropicConfig from .llms.anthropic.chat.transformation import AnthropicConfig
from .llms.anthropic.common_utils import AnthropicModelInfo
from .llms.groq.stt.transformation import GroqSTTConfig from .llms.groq.stt.transformation import GroqSTTConfig
from .llms.anthropic.completion.transformation import AnthropicTextConfig from .llms.anthropic.completion.transformation import AnthropicTextConfig
from .llms.triton.completion.transformation import TritonConfig from .llms.triton.completion.transformation import TritonConfig
@ -848,6 +839,7 @@ from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig, VertexGeminiConfig,
VertexGeminiConfig as VertexAIConfig, VertexGeminiConfig as VertexAIConfig,
) )
from .llms.gemini.common_utils import GeminiModelInfo
from .llms.gemini.chat.transformation import ( from .llms.gemini.chat.transformation import (
GoogleAIStudioGeminiConfig, GoogleAIStudioGeminiConfig,
GoogleAIStudioGeminiConfig as GeminiConfig, # aliased to maintain backwards compatibility GoogleAIStudioGeminiConfig as GeminiConfig, # aliased to maintain backwards compatibility
@ -950,6 +942,12 @@ openaiOSeriesConfig = OpenAIOSeriesConfig()
from .llms.openai.chat.gpt_transformation import ( from .llms.openai.chat.gpt_transformation import (
OpenAIGPTConfig, OpenAIGPTConfig,
) )
from .llms.openai.transcriptions.whisper_transformation import (
OpenAIWhisperAudioTranscriptionConfig,
)
from .llms.openai.transcriptions.gpt_transformation import (
OpenAIGPTAudioTranscriptionConfig,
)
openAIGPTConfig = OpenAIGPTConfig() openAIGPTConfig = OpenAIGPTConfig()
from .llms.openai.chat.gpt_audio_transformation import ( from .llms.openai.chat.gpt_audio_transformation import (
@ -978,6 +976,7 @@ from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
from .llms.friendliai.chat.transformation import FriendliaiChatConfig from .llms.friendliai.chat.transformation import FriendliaiChatConfig
from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
from .llms.xai.chat.transformation import XAIChatConfig from .llms.xai.chat.transformation import XAIChatConfig
from .llms.xai.common_utils import XAIModelInfo
from .llms.volcengine import VolcEngineConfig from .llms.volcengine import VolcEngineConfig
from .llms.codestral.completion.transformation import CodestralTextCompletionConfig from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
from .llms.azure.azure import ( from .llms.azure.azure import (
@ -1047,10 +1046,10 @@ from .types.llms.custom_llm import CustomLLMItem
from .types.utils import GenericStreamingChunk from .types.utils import GenericStreamingChunk
custom_provider_map: List[CustomLLMItem] = [] custom_provider_map: List[CustomLLMItem] = []
_custom_providers: List[str] = ( _custom_providers: List[
[] str
) # internal helper util, used to track names of custom providers ] = [] # internal helper util, used to track names of custom providers
disable_hf_tokenizer_download: Optional[bool] = ( disable_hf_tokenizer_download: Optional[
None # disable huggingface tokenizer download. Defaults to openai clk100 bool
) ] = None # disable huggingface tokenizer download. Defaults to openai clk100
global_disable_no_log_param: bool = False global_disable_no_log_param: bool = False

View file

@ -1,6 +1,7 @@
import json import json
import logging import logging
import os import os
import sys
from datetime import datetime from datetime import datetime
from logging import Formatter from logging import Formatter
@ -40,9 +41,56 @@ class JsonFormatter(Formatter):
return json.dumps(json_record) return json.dumps(json_record)
# Function to set up exception handlers for JSON logging
def _setup_json_exception_handlers(formatter):
# Create a handler with JSON formatting for exceptions
error_handler = logging.StreamHandler()
error_handler.setFormatter(formatter)
# Setup excepthook for uncaught exceptions
def json_excepthook(exc_type, exc_value, exc_traceback):
record = logging.LogRecord(
name="LiteLLM",
level=logging.ERROR,
pathname="",
lineno=0,
msg=str(exc_value),
args=(),
exc_info=(exc_type, exc_value, exc_traceback),
)
error_handler.handle(record)
sys.excepthook = json_excepthook
# Configure asyncio exception handler if possible
try:
import asyncio
def async_json_exception_handler(loop, context):
exception = context.get("exception")
if exception:
record = logging.LogRecord(
name="LiteLLM",
level=logging.ERROR,
pathname="",
lineno=0,
msg=str(exception),
args=(),
exc_info=None,
)
error_handler.handle(record)
else:
loop.default_exception_handler(context)
asyncio.get_event_loop().set_exception_handler(async_json_exception_handler)
except Exception:
pass
# Create a formatter and set it for the handler # Create a formatter and set it for the handler
if json_logs: if json_logs:
handler.setFormatter(JsonFormatter()) handler.setFormatter(JsonFormatter())
_setup_json_exception_handlers(JsonFormatter())
else: else:
formatter = logging.Formatter( formatter = logging.Formatter(
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s", "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
@ -65,18 +113,24 @@ def _turn_on_json():
handler = logging.StreamHandler() handler = logging.StreamHandler()
handler.setFormatter(JsonFormatter()) handler.setFormatter(JsonFormatter())
# Define a list of the loggers to update # Define all loggers to update, including root logger
loggers = [verbose_router_logger, verbose_proxy_logger, verbose_logger] loggers = [logging.getLogger()] + [
verbose_router_logger,
verbose_proxy_logger,
verbose_logger,
]
# Iterate through each logger and update its handlers # Iterate through each logger and update its handlers
for logger in loggers: for logger in loggers:
# Remove all existing handlers # Remove all existing handlers
for h in logger.handlers[:]: for h in logger.handlers[:]:
logger.removeHandler(h) logger.removeHandler(h)
# Add the new handler # Add the new handler
logger.addHandler(handler) logger.addHandler(handler)
# Set up exception handlers
_setup_json_exception_handlers(JsonFormatter())
def _turn_on_debug(): def _turn_on_debug():
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug

View file

@ -202,6 +202,7 @@ def init_redis_cluster(redis_kwargs) -> redis.RedisCluster:
def _init_redis_sentinel(redis_kwargs) -> redis.Redis: def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
sentinel_nodes = redis_kwargs.get("sentinel_nodes") sentinel_nodes = redis_kwargs.get("sentinel_nodes")
sentinel_password = redis_kwargs.get("sentinel_password")
service_name = redis_kwargs.get("service_name") service_name = redis_kwargs.get("service_name")
if not sentinel_nodes or not service_name: if not sentinel_nodes or not service_name:
@ -212,7 +213,11 @@ def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.") verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.")
# Set up the Sentinel client # Set up the Sentinel client
sentinel = redis.Sentinel(sentinel_nodes, socket_timeout=0.1) sentinel = redis.Sentinel(
sentinel_nodes,
socket_timeout=0.1,
password=sentinel_password,
)
# Return the master instance for the given service # Return the master instance for the given service

View file

@ -15,7 +15,7 @@ from .types.services import ServiceLoggerPayload, ServiceTypes
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
OTELClass = OpenTelemetry OTELClass = OpenTelemetry
else: else:
Span = Any Span = Any

View file

@ -153,7 +153,6 @@ def create_batch(
) )
api_base: Optional[str] = None api_base: Optional[str] = None
if custom_llm_provider == "openai": if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = ( api_base = (
optional_params.api_base optional_params.api_base
@ -358,7 +357,6 @@ def retrieve_batch(
_is_async = kwargs.pop("aretrieve_batch", False) is True _is_async = kwargs.pop("aretrieve_batch", False) is True
api_base: Optional[str] = None api_base: Optional[str] = None
if custom_llm_provider == "openai": if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = ( api_base = (
optional_params.api_base optional_params.api_base

View file

@ -9,12 +9,12 @@ Has 4 methods:
""" """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional, Union
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
else: else:
Span = Any Span = Any

View file

@ -66,9 +66,7 @@ class CachingHandlerResponse(BaseModel):
cached_result: Optional[Any] = None cached_result: Optional[Any] = None
final_embedding_cached_response: Optional[EmbeddingResponse] = None final_embedding_cached_response: Optional[EmbeddingResponse] = None
embedding_all_elements_cache_hit: bool = ( embedding_all_elements_cache_hit: bool = False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
)
class LLMCachingHandler: class LLMCachingHandler:
@ -738,7 +736,6 @@ class LLMCachingHandler:
if self._should_store_result_in_cache( if self._should_store_result_in_cache(
original_function=self.original_function, kwargs=new_kwargs original_function=self.original_function, kwargs=new_kwargs
): ):
litellm.cache.add_cache(result, **new_kwargs) litellm.cache.add_cache(result, **new_kwargs)
return return
@ -865,9 +862,9 @@ class LLMCachingHandler:
} }
if litellm.cache is not None: if litellm.cache is not None:
litellm_params["preset_cache_key"] = ( litellm_params[
litellm.cache._get_preset_cache_key_from_kwargs(**kwargs) "preset_cache_key"
) ] = litellm.cache._get_preset_cache_key_from_kwargs(**kwargs)
else: else:
litellm_params["preset_cache_key"] = None litellm_params["preset_cache_key"] = None

View file

@ -1,12 +1,12 @@
import json import json
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional, Union
from .base_cache import BaseCache from .base_cache import BaseCache
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
else: else:
Span = Any Span = Any

View file

@ -12,7 +12,7 @@ import asyncio
import time import time
import traceback import traceback
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING, Any, List, Optional from typing import TYPE_CHECKING, Any, List, Optional, Union
import litellm import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
@ -24,7 +24,7 @@ from .redis_cache import RedisCache
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
else: else:
Span = Any Span = Any

View file

@ -8,7 +8,6 @@ from .in_memory_cache import InMemoryCache
class LLMClientCache(InMemoryCache): class LLMClientCache(InMemoryCache):
def update_cache_key_with_event_loop(self, key): def update_cache_key_with_event_loop(self, key):
""" """
Add the event loop to the cache key, to prevent event loop closed errors. Add the event loop to the cache key, to prevent event loop closed errors.

View file

@ -34,7 +34,7 @@ if TYPE_CHECKING:
cluster_pipeline = ClusterPipeline cluster_pipeline = ClusterPipeline
async_redis_client = Redis async_redis_client = Redis
async_redis_cluster_client = RedisCluster async_redis_cluster_client = RedisCluster
Span = _Span Span = Union[_Span, Any]
else: else:
pipeline = Any pipeline = Any
cluster_pipeline = Any cluster_pipeline = Any
@ -57,7 +57,6 @@ class RedisCache(BaseCache):
socket_timeout: Optional[float] = 5.0, # default 5 second timeout socket_timeout: Optional[float] = 5.0, # default 5 second timeout
**kwargs, **kwargs,
): ):
from litellm._service_logger import ServiceLogging from litellm._service_logger import ServiceLogging
from .._redis import get_redis_client, get_redis_connection_pool from .._redis import get_redis_client, get_redis_connection_pool
@ -1045,3 +1044,109 @@ class RedisCache(BaseCache):
except Exception as e: except Exception as e:
verbose_logger.debug(f"Redis TTL Error: {e}") verbose_logger.debug(f"Redis TTL Error: {e}")
return None return None
async def async_rpush(
self,
key: str,
values: List[Any],
parent_otel_span: Optional[Span] = None,
**kwargs,
) -> int:
"""
Append one or multiple values to a list stored at key
Args:
key: The Redis key of the list
values: One or more values to append to the list
parent_otel_span: Optional parent OpenTelemetry span
Returns:
int: The length of the list after the push operation
"""
_redis_client: Any = self.init_async_client()
start_time = time.time()
try:
response = await _redis_client.rpush(key, *values)
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_rpush",
)
)
return response
except Exception as e:
# NON blocking - notify users Redis is throwing an exception
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_rpush",
)
)
verbose_logger.error(
f"LiteLLM Redis Cache RPUSH: - Got exception from REDIS : {str(e)}"
)
raise e
async def async_lpop(
self,
key: str,
count: Optional[int] = None,
parent_otel_span: Optional[Span] = None,
**kwargs,
) -> Union[Any, List[Any]]:
_redis_client: Any = self.init_async_client()
start_time = time.time()
print_verbose(f"LPOP from Redis list: key: {key}, count: {count}")
try:
result = await _redis_client.lpop(key, count)
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_lpop",
)
)
# Handle result parsing if needed
if isinstance(result, bytes):
try:
return result.decode("utf-8")
except Exception:
return result
elif isinstance(result, list) and all(
isinstance(item, bytes) for item in result
):
try:
return [item.decode("utf-8") for item in result]
except Exception:
return result
return result
except Exception as e:
# NON blocking - notify users Redis is throwing an exception
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_lpop",
)
)
verbose_logger.error(
f"LiteLLM Redis Cache LPOP: - Got exception from REDIS : {str(e)}"
)
raise e

View file

@ -5,7 +5,7 @@ Key differences:
- RedisClient NEEDs to be re-used across requests, adds 3000ms latency if it's re-created - RedisClient NEEDs to be re-used across requests, adds 3000ms latency if it's re-created
""" """
from typing import TYPE_CHECKING, Any, List, Optional from typing import TYPE_CHECKING, Any, List, Optional, Union
from litellm.caching.redis_cache import RedisCache from litellm.caching.redis_cache import RedisCache
@ -16,7 +16,7 @@ if TYPE_CHECKING:
pipeline = Pipeline pipeline = Pipeline
async_redis_client = Redis async_redis_client = Redis
Span = _Span Span = Union[_Span, Any]
else: else:
pipeline = Any pipeline = Any
async_redis_client = Any async_redis_client = Any

View file

@ -13,11 +13,15 @@ import ast
import asyncio import asyncio
import json import json
import os import os
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple, cast
import litellm import litellm
from litellm._logging import print_verbose from litellm._logging import print_verbose
from litellm.litellm_core_utils.prompt_templates.common_utils import get_str_from_messages from litellm.litellm_core_utils.prompt_templates.common_utils import (
get_str_from_messages,
)
from litellm.types.utils import EmbeddingResponse
from .base_cache import BaseCache from .base_cache import BaseCache
@ -87,14 +91,16 @@ class RedisSemanticCache(BaseCache):
if redis_url is None: if redis_url is None:
try: try:
# Attempt to use provided parameters or fallback to environment variables # Attempt to use provided parameters or fallback to environment variables
host = host or os.environ['REDIS_HOST'] host = host or os.environ["REDIS_HOST"]
port = port or os.environ['REDIS_PORT'] port = port or os.environ["REDIS_PORT"]
password = password or os.environ['REDIS_PASSWORD'] password = password or os.environ["REDIS_PASSWORD"]
except KeyError as e: except KeyError as e:
# Raise a more informative exception if any of the required keys are missing # Raise a more informative exception if any of the required keys are missing
missing_var = e.args[0] missing_var = e.args[0]
raise ValueError(f"Missing required Redis configuration: {missing_var}. " raise ValueError(
f"Provide {missing_var} or redis_url.") from e f"Missing required Redis configuration: {missing_var}. "
f"Provide {missing_var} or redis_url."
) from e
redis_url = f"redis://:{password}@{host}:{port}" redis_url = f"redis://:{password}@{host}:{port}"
@ -137,10 +143,13 @@ class RedisSemanticCache(BaseCache):
List[float]: The embedding vector List[float]: The embedding vector
""" """
# Create an embedding from prompt # Create an embedding from prompt
embedding_response = litellm.embedding( embedding_response = cast(
EmbeddingResponse,
litellm.embedding(
model=self.embedding_model, model=self.embedding_model,
input=prompt, input=prompt,
cache={"no-store": True, "no-cache": True}, cache={"no-store": True, "no-cache": True},
),
) )
embedding = embedding_response["data"][0]["embedding"] embedding = embedding_response["data"][0]["embedding"]
return embedding return embedding
@ -186,6 +195,7 @@ class RedisSemanticCache(BaseCache):
""" """
print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}") print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}")
value_str: Optional[str] = None
try: try:
# Extract the prompt from messages # Extract the prompt from messages
messages = kwargs.get("messages", []) messages = kwargs.get("messages", [])
@ -203,7 +213,9 @@ class RedisSemanticCache(BaseCache):
else: else:
self.llmcache.store(prompt, value_str) self.llmcache.store(prompt, value_str)
except Exception as e: except Exception as e:
print_verbose(f"Error setting {value_str} in the Redis semantic cache: {str(e)}") print_verbose(
f"Error setting {value_str or value} in the Redis semantic cache: {str(e)}"
)
def get_cache(self, key: str, **kwargs) -> Any: def get_cache(self, key: str, **kwargs) -> Any:
""" """
@ -336,13 +348,13 @@ class RedisSemanticCache(BaseCache):
prompt, prompt,
value_str, value_str,
vector=prompt_embedding, # Pass through custom embedding vector=prompt_embedding, # Pass through custom embedding
ttl=ttl ttl=ttl,
) )
else: else:
await self.llmcache.astore( await self.llmcache.astore(
prompt, prompt,
value_str, value_str,
vector=prompt_embedding # Pass through custom embedding vector=prompt_embedding, # Pass through custom embedding
) )
except Exception as e: except Exception as e:
print_verbose(f"Error in async_set_cache: {str(e)}") print_verbose(f"Error in async_set_cache: {str(e)}")
@ -374,14 +386,13 @@ class RedisSemanticCache(BaseCache):
prompt_embedding = await self._get_async_embedding(prompt, **kwargs) prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
# Check the cache for semantically similar prompts # Check the cache for semantically similar prompts
results = await self.llmcache.acheck( results = await self.llmcache.acheck(prompt=prompt, vector=prompt_embedding)
prompt=prompt,
vector=prompt_embedding
)
# handle results / cache hit # handle results / cache hit
if not results: if not results:
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 # TODO why here but not above?? kwargs.setdefault("metadata", {})[
"semantic-similarity"
] = 0.0 # TODO why here but not above??
return None return None
cache_hit = results[0] cache_hit = results[0]
@ -420,7 +431,9 @@ class RedisSemanticCache(BaseCache):
aindex = await self.llmcache._get_async_index() aindex = await self.llmcache._get_async_index()
return await aindex.info() return await aindex.info()
async def async_set_cache_pipeline(self, cache_list: List[Tuple[str, Any]], **kwargs) -> None: async def async_set_cache_pipeline(
self, cache_list: List[Tuple[str, Any]], **kwargs
) -> None:
""" """
Asynchronously store multiple values in the semantic cache. Asynchronously store multiple values in the semantic cache.

View file

@ -123,7 +123,7 @@ class S3Cache(BaseCache):
) # Convert string to dictionary ) # Convert string to dictionary
except Exception: except Exception:
cached_response = ast.literal_eval(cached_response) cached_response = ast.literal_eval(cached_response)
if type(cached_response) is not dict: if not isinstance(cached_response, dict):
cached_response = dict(cached_response) cached_response = dict(cached_response)
verbose_logger.debug( verbose_logger.debug(
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}" f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"

View file

@ -4,9 +4,11 @@ ROUTER_MAX_FALLBACKS = 5
DEFAULT_BATCH_SIZE = 512 DEFAULT_BATCH_SIZE = 512
DEFAULT_FLUSH_INTERVAL_SECONDS = 5 DEFAULT_FLUSH_INTERVAL_SECONDS = 5
DEFAULT_MAX_RETRIES = 2 DEFAULT_MAX_RETRIES = 2
DEFAULT_MAX_RECURSE_DEPTH = 10
DEFAULT_FAILURE_THRESHOLD_PERCENT = ( DEFAULT_FAILURE_THRESHOLD_PERCENT = (
0.5 # default cooldown a deployment if 50% of requests fail in a given minute 0.5 # default cooldown a deployment if 50% of requests fail in a given minute
) )
DEFAULT_MAX_TOKENS = 4096
DEFAULT_REDIS_SYNC_INTERVAL = 1 DEFAULT_REDIS_SYNC_INTERVAL = 1
DEFAULT_COOLDOWN_TIME_SECONDS = 5 DEFAULT_COOLDOWN_TIME_SECONDS = 5
DEFAULT_REPLICATE_POLLING_RETRIES = 5 DEFAULT_REPLICATE_POLLING_RETRIES = 5
@ -16,6 +18,8 @@ DEFAULT_IMAGE_WIDTH = 300
DEFAULT_IMAGE_HEIGHT = 300 DEFAULT_IMAGE_HEIGHT = 300
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic. SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
#### RELIABILITY #### #### RELIABILITY ####
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
#### Networking settings #### #### Networking settings ####
@ -414,6 +418,7 @@ RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when conv
########################### Logging Callback Constants ########################### ########################### Logging Callback Constants ###########################
AZURE_STORAGE_MSFT_VERSION = "2019-07-07" AZURE_STORAGE_MSFT_VERSION = "2019-07-07"
MCP_TOOL_NAME_PREFIX = "mcp_tool"
########################### LiteLLM Proxy Specific Constants ########################### ########################### LiteLLM Proxy Specific Constants ###########################
######################################################################################## ########################################################################################
@ -441,3 +446,7 @@ HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard" UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
LITELLM_PROXY_ADMIN_NAME = "default_user_id" LITELLM_PROXY_ADMIN_NAME = "default_user_id"
########################### DB CRON JOB NAMES ###########################
DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job"
DEFAULT_CRON_JOB_LOCK_TTL_SECONDS = 60 # 1 minute

View file

@ -2,7 +2,7 @@
## File for 'response_cost' calculation in Logging ## File for 'response_cost' calculation in Logging
import time import time
from functools import lru_cache from functools import lru_cache
from typing import Any, List, Literal, Optional, Tuple, Union from typing import Any, List, Literal, Optional, Tuple, Union, cast
from pydantic import BaseModel from pydantic import BaseModel
@ -275,15 +275,13 @@ def cost_per_token( # noqa: PLR0915
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
prompt_characters=prompt_characters, prompt_characters=prompt_characters,
completion_characters=completion_characters, completion_characters=completion_characters,
prompt_tokens=prompt_tokens, usage=usage_block,
completion_tokens=completion_tokens,
) )
elif cost_router == "cost_per_token": elif cost_router == "cost_per_token":
return google_cost_per_token( return google_cost_per_token(
model=model_without_prefix, model=model_without_prefix,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens, usage=usage_block,
completion_tokens=completion_tokens,
) )
elif custom_llm_provider == "anthropic": elif custom_llm_provider == "anthropic":
return anthropic_cost_per_token(model=model, usage=usage_block) return anthropic_cost_per_token(model=model, usage=usage_block)
@ -464,13 +462,36 @@ def _model_contains_known_llm_provider(model: str) -> bool:
def _get_usage_object( def _get_usage_object(
completion_response: Any, completion_response: Any,
) -> Optional[Usage]: ) -> Optional[Usage]:
usage_obj: Optional[Usage] = None usage_obj = cast(
if completion_response is not None and isinstance( Union[Usage, ResponseAPIUsage, dict, BaseModel],
completion_response, ModelResponse (
): completion_response.get("usage")
usage_obj = completion_response.get("usage") if isinstance(completion_response, dict)
else getattr(completion_response, "get", lambda x: None)("usage")
),
)
if usage_obj is None:
return None
if isinstance(usage_obj, Usage):
return usage_obj return usage_obj
elif (
usage_obj is not None
and (isinstance(usage_obj, dict) or isinstance(usage_obj, ResponseAPIUsage))
and ResponseAPILoggingUtils._is_response_api_usage(usage_obj)
):
return ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage_obj
)
elif isinstance(usage_obj, dict):
return Usage(**usage_obj)
elif isinstance(usage_obj, BaseModel):
return Usage(**usage_obj.model_dump())
else:
verbose_logger.debug(
f"Unknown usage object type: {type(usage_obj)}, usage_obj: {usage_obj}"
)
return None
def _is_known_usage_objects(usage_obj): def _is_known_usage_objects(usage_obj):
@ -559,7 +580,6 @@ def completion_cost( # noqa: PLR0915
- For un-mapped Replicate models, the cost is calculated based on the total time used for the request. - For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
""" """
try: try:
call_type = _infer_call_type(call_type, completion_response) or "completion" call_type = _infer_call_type(call_type, completion_response) or "completion"
if ( if (
@ -664,6 +684,7 @@ def completion_cost( # noqa: PLR0915
elif len(prompt) > 0: elif len(prompt) > 0:
prompt_tokens = token_counter(model=model, text=prompt) prompt_tokens = token_counter(model=model, text=prompt)
completion_tokens = token_counter(model=model, text=completion) completion_tokens = token_counter(model=model, text=completion)
if model is None: if model is None:
raise ValueError( raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}" f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
@ -828,11 +849,14 @@ def get_response_cost_from_hidden_params(
_hidden_params_dict = hidden_params _hidden_params_dict = hidden_params
additional_headers = _hidden_params_dict.get("additional_headers", {}) additional_headers = _hidden_params_dict.get("additional_headers", {})
if additional_headers and "x-litellm-response-cost" in additional_headers: if (
response_cost = additional_headers["x-litellm-response-cost"] additional_headers
and "llm_provider-x-litellm-response-cost" in additional_headers
):
response_cost = additional_headers["llm_provider-x-litellm-response-cost"]
if response_cost is None: if response_cost is None:
return None return None
return float(additional_headers["x-litellm-response-cost"]) return float(additional_headers["llm_provider-x-litellm-response-cost"])
return None return None

View file

@ -1,5 +1,5 @@
import json import json
from typing import List, Literal, Union from typing import Dict, List, Literal, Union
from mcp import ClientSession from mcp import ClientSession
from mcp.types import CallToolRequestParams as MCPCallToolRequestParams from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
@ -76,8 +76,8 @@ def _get_function_arguments(function: FunctionDefinition) -> dict:
return arguments if isinstance(arguments, dict) else {} return arguments if isinstance(arguments, dict) else {}
def _transform_openai_tool_call_to_mcp_tool_call_request( def transform_openai_tool_call_request_to_mcp_tool_call_request(
openai_tool: ChatCompletionMessageToolCall, openai_tool: Union[ChatCompletionMessageToolCall, Dict],
) -> MCPCallToolRequestParams: ) -> MCPCallToolRequestParams:
"""Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams.""" """Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams."""
function = openai_tool["function"] function = openai_tool["function"]
@ -100,9 +100,11 @@ async def call_openai_tool(
Returns: Returns:
The result of the MCP tool call. The result of the MCP tool call.
""" """
mcp_tool_call_request_params = _transform_openai_tool_call_to_mcp_tool_call_request( mcp_tool_call_request_params = (
transform_openai_tool_call_request_to_mcp_tool_call_request(
openai_tool=openai_tool, openai_tool=openai_tool,
) )
)
return await call_mcp_tool( return await call_mcp_tool(
session=session, session=session,
call_tool_request_params=mcp_tool_call_request_params, call_tool_request_params=mcp_tool_call_request_params,

View file

@ -138,7 +138,6 @@ def create_fine_tuning_job(
# OpenAI # OpenAI
if custom_llm_provider == "openai": if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = ( api_base = (
optional_params.api_base optional_params.api_base
@ -360,7 +359,6 @@ def cancel_fine_tuning_job(
# OpenAI # OpenAI
if custom_llm_provider == "openai": if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = ( api_base = (
optional_params.api_base optional_params.api_base
@ -522,7 +520,6 @@ def list_fine_tuning_jobs(
# OpenAI # OpenAI
if custom_llm_provider == "openai": if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = ( api_base = (
optional_params.api_base optional_params.api_base

View file

@ -19,7 +19,6 @@ else:
def squash_payloads(queue): def squash_payloads(queue):
squashed = {} squashed = {}
if len(queue) == 0: if len(queue) == 0:
return squashed return squashed

View file

@ -195,13 +195,16 @@ class SlackAlerting(CustomBatchLogger):
if self.alerting is None or self.alert_types is None: if self.alerting is None or self.alert_types is None:
return return
time_difference_float, model, api_base, messages = ( (
self._response_taking_too_long_callback_helper( time_difference_float,
model,
api_base,
messages,
) = self._response_taking_too_long_callback_helper(
kwargs=kwargs, kwargs=kwargs,
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
) )
)
if litellm.turn_off_message_logging or litellm.redact_messages_in_exceptions: if litellm.turn_off_message_logging or litellm.redact_messages_in_exceptions:
messages = "Message not logged. litellm.redact_messages_in_exceptions=True" messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`" request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
@ -819,9 +822,9 @@ class SlackAlerting(CustomBatchLogger):
### UNIQUE CACHE KEY ### ### UNIQUE CACHE KEY ###
cache_key = provider + region_name cache_key = provider + region_name
outage_value: Optional[ProviderRegionOutageModel] = ( outage_value: Optional[
await self.internal_usage_cache.async_get_cache(key=cache_key) ProviderRegionOutageModel
) ] = await self.internal_usage_cache.async_get_cache(key=cache_key)
if ( if (
getattr(exception, "status_code", None) is None getattr(exception, "status_code", None) is None
@ -1402,9 +1405,9 @@ Model Info:
self.alert_to_webhook_url is not None self.alert_to_webhook_url is not None
and alert_type in self.alert_to_webhook_url and alert_type in self.alert_to_webhook_url
): ):
slack_webhook_url: Optional[Union[str, List[str]]] = ( slack_webhook_url: Optional[
self.alert_to_webhook_url[alert_type] Union[str, List[str]]
) ] = self.alert_to_webhook_url[alert_type]
elif self.default_webhook_url is not None: elif self.default_webhook_url is not None:
slack_webhook_url = self.default_webhook_url slack_webhook_url = self.default_webhook_url
else: else:
@ -1768,7 +1771,6 @@ Model Info:
- Team Created, Updated, Deleted - Team Created, Updated, Deleted
""" """
try: try:
message = f"`{event_name}`\n" message = f"`{event_name}`\n"
key_event_dict = key_event.model_dump() key_event_dict = key_event.model_dump()

View file

@ -98,7 +98,6 @@ class ArgillaLogger(CustomBatchLogger):
argilla_dataset_name: Optional[str], argilla_dataset_name: Optional[str],
argilla_base_url: Optional[str], argilla_base_url: Optional[str],
) -> ArgillaCredentialsObject: ) -> ArgillaCredentialsObject:
_credentials_api_key = argilla_api_key or os.getenv("ARGILLA_API_KEY") _credentials_api_key = argilla_api_key or os.getenv("ARGILLA_API_KEY")
if _credentials_api_key is None: if _credentials_api_key is None:
raise Exception("Invalid Argilla API Key given. _credentials_api_key=None.") raise Exception("Invalid Argilla API Key given. _credentials_api_key=None.")

View file

@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional, Union
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
@ -7,7 +7,7 @@ from litellm.types.utils import StandardLoggingPayload
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
else: else:
Span = Any Span = Any

View file

@ -19,14 +19,13 @@ if TYPE_CHECKING:
from litellm.types.integrations.arize import Protocol as _Protocol from litellm.types.integrations.arize import Protocol as _Protocol
Protocol = _Protocol Protocol = _Protocol
Span = _Span Span = Union[_Span, Any]
else: else:
Protocol = Any Protocol = Any
Span = Any Span = Any
class ArizeLogger(OpenTelemetry): class ArizeLogger(OpenTelemetry):
def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]): def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
ArizeLogger.set_arize_attributes(span, kwargs, response_obj) ArizeLogger.set_arize_attributes(span, kwargs, response_obj)
return return

View file

@ -1,17 +1,20 @@
import os import os
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any, Union
from litellm.integrations.arize import _utils
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.arize import _utils
from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig
if TYPE_CHECKING: if TYPE_CHECKING:
from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
from litellm.types.integrations.arize import Protocol as _Protocol
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
from litellm.types.integrations.arize import Protocol as _Protocol
from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
Protocol = _Protocol Protocol = _Protocol
OpenTelemetryConfig = _OpenTelemetryConfig OpenTelemetryConfig = _OpenTelemetryConfig
Span = _Span Span = Union[_Span, Any]
else: else:
Protocol = Any Protocol = Any
OpenTelemetryConfig = Any OpenTelemetryConfig = Any
@ -20,6 +23,7 @@ else:
ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://app.phoenix.arize.com/v1/traces" ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://app.phoenix.arize.com/v1/traces"
class ArizePhoenixLogger: class ArizePhoenixLogger:
@staticmethod @staticmethod
def set_arize_phoenix_attributes(span: Span, kwargs, response_obj): def set_arize_phoenix_attributes(span: Span, kwargs, response_obj):
@ -59,15 +63,14 @@ class ArizePhoenixLogger:
# a slightly different auth header format than self hosted phoenix # a slightly different auth header format than self hosted phoenix
if endpoint == ARIZE_HOSTED_PHOENIX_ENDPOINT: if endpoint == ARIZE_HOSTED_PHOENIX_ENDPOINT:
if api_key is None: if api_key is None:
raise ValueError("PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used.") raise ValueError(
"PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used."
)
otlp_auth_headers = f"api_key={api_key}" otlp_auth_headers = f"api_key={api_key}"
elif api_key is not None: elif api_key is not None:
# api_key/auth is optional for self hosted phoenix # api_key/auth is optional for self hosted phoenix
otlp_auth_headers = f"Authorization=Bearer {api_key}" otlp_auth_headers = f"Authorization=Bearer {api_key}"
return ArizePhoenixConfig( return ArizePhoenixConfig(
otlp_auth_headers=otlp_auth_headers, otlp_auth_headers=otlp_auth_headers, protocol=protocol, endpoint=endpoint
protocol=protocol,
endpoint=endpoint
) )

View file

@ -12,7 +12,10 @@ class AthinaLogger:
"athina-api-key": self.athina_api_key, "athina-api-key": self.athina_api_key,
"Content-Type": "application/json", "Content-Type": "application/json",
} }
self.athina_logging_url = os.getenv("ATHINA_BASE_URL", "https://log.athina.ai") + "/api/v1/log/inference" self.athina_logging_url = (
os.getenv("ATHINA_BASE_URL", "https://log.athina.ai")
+ "/api/v1/log/inference"
)
self.additional_keys = [ self.additional_keys = [
"environment", "environment",
"prompt_slug", "prompt_slug",

View file

@ -50,12 +50,12 @@ class AzureBlobStorageLogger(CustomBatchLogger):
self.azure_storage_file_system: str = _azure_storage_file_system self.azure_storage_file_system: str = _azure_storage_file_system
# Internal variables used for Token based authentication # Internal variables used for Token based authentication
self.azure_auth_token: Optional[str] = ( self.azure_auth_token: Optional[
None # the Azure AD token to use for Azure Storage API requests str
) ] = None # the Azure AD token to use for Azure Storage API requests
self.token_expiry: Optional[datetime] = ( self.token_expiry: Optional[
None # the expiry time of the currentAzure AD token datetime
) ] = None # the expiry time of the currentAzure AD token
asyncio.create_task(self.periodic_flush()) asyncio.create_task(self.periodic_flush())
self.flush_lock = asyncio.Lock() self.flush_lock = asyncio.Lock()
@ -153,7 +153,6 @@ class AzureBlobStorageLogger(CustomBatchLogger):
3. Flush the data 3. Flush the data
""" """
try: try:
if self.azure_storage_account_key: if self.azure_storage_account_key:
await self.upload_to_azure_data_lake_with_azure_account_key( await self.upload_to_azure_data_lake_with_azure_account_key(
payload=payload payload=payload

View file

@ -4,7 +4,7 @@
import copy import copy
import os import os
from datetime import datetime from datetime import datetime
from typing import Optional, Dict from typing import Dict, Optional
import httpx import httpx
from pydantic import BaseModel from pydantic import BaseModel
@ -19,7 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
) )
from litellm.utils import print_verbose from litellm.utils import print_verbose
global_braintrust_http_handler = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback) global_braintrust_http_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback
)
global_braintrust_sync_http_handler = HTTPHandler() global_braintrust_sync_http_handler = HTTPHandler()
API_BASE = "https://api.braintrustdata.com/v1" API_BASE = "https://api.braintrustdata.com/v1"
@ -35,7 +37,9 @@ def get_utc_datetime():
class BraintrustLogger(CustomLogger): class BraintrustLogger(CustomLogger):
def __init__(self, api_key: Optional[str] = None, api_base: Optional[str] = None) -> None: def __init__(
self, api_key: Optional[str] = None, api_base: Optional[str] = None
) -> None:
super().__init__() super().__init__()
self.validate_environment(api_key=api_key) self.validate_environment(api_key=api_key)
self.api_base = api_base or API_BASE self.api_base = api_base or API_BASE
@ -45,7 +49,9 @@ class BraintrustLogger(CustomLogger):
"Authorization": "Bearer " + self.api_key, "Authorization": "Bearer " + self.api_key,
"Content-Type": "application/json", "Content-Type": "application/json",
} }
self._project_id_cache: Dict[str, str] = {} # Cache mapping project names to IDs self._project_id_cache: Dict[
str, str
] = {} # Cache mapping project names to IDs
def validate_environment(self, api_key: Optional[str]): def validate_environment(self, api_key: Optional[str]):
""" """
@ -71,7 +77,9 @@ class BraintrustLogger(CustomLogger):
try: try:
response = global_braintrust_sync_http_handler.post( response = global_braintrust_sync_http_handler.post(
f"{self.api_base}/project", headers=self.headers, json={"name": project_name} f"{self.api_base}/project",
headers=self.headers,
json={"name": project_name},
) )
project_dict = response.json() project_dict = response.json()
project_id = project_dict["id"] project_id = project_dict["id"]
@ -89,7 +97,9 @@ class BraintrustLogger(CustomLogger):
try: try:
response = await global_braintrust_http_handler.post( response = await global_braintrust_http_handler.post(
f"{self.api_base}/project/register", headers=self.headers, json={"name": project_name} f"{self.api_base}/project/register",
headers=self.headers,
json={"name": project_name},
) )
project_dict = response.json() project_dict = response.json()
project_id = project_dict["id"] project_id = project_dict["id"]
@ -116,15 +126,21 @@ class BraintrustLogger(CustomLogger):
if metadata is None: if metadata is None:
metadata = {} metadata = {}
proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {}) or {} proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
for metadata_param_key in proxy_headers: for metadata_param_key in proxy_headers:
if metadata_param_key.startswith("braintrust"): if metadata_param_key.startswith("braintrust"):
trace_param_key = metadata_param_key.replace("braintrust", "", 1) trace_param_key = metadata_param_key.replace("braintrust", "", 1)
if trace_param_key in metadata: if trace_param_key in metadata:
verbose_logger.warning(f"Overwriting Braintrust `{trace_param_key}` from request header") verbose_logger.warning(
f"Overwriting Braintrust `{trace_param_key}` from request header"
)
else: else:
verbose_logger.debug(f"Found Braintrust `{trace_param_key}` in request header") verbose_logger.debug(
f"Found Braintrust `{trace_param_key}` in request header"
)
metadata[trace_param_key] = proxy_headers.get(metadata_param_key) metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
return metadata return metadata
@ -157,24 +173,35 @@ class BraintrustLogger(CustomLogger):
output = None output = None
choices = [] choices = []
if response_obj is not None and ( if response_obj is not None and (
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse) kwargs.get("call_type", None) == "embedding"
or isinstance(response_obj, litellm.EmbeddingResponse)
): ):
output = None output = None
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse): elif response_obj is not None and isinstance(
response_obj, litellm.ModelResponse
):
output = response_obj["choices"][0]["message"].json() output = response_obj["choices"][0]["message"].json()
choices = response_obj["choices"] choices = response_obj["choices"]
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse): elif response_obj is not None and isinstance(
response_obj, litellm.TextCompletionResponse
):
output = response_obj.choices[0].text output = response_obj.choices[0].text
choices = response_obj.choices choices = response_obj.choices
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse): elif response_obj is not None and isinstance(
response_obj, litellm.ImageResponse
):
output = response_obj["data"] output = response_obj["data"]
litellm_params = kwargs.get("litellm_params", {}) litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata) metadata = self.add_metadata_from_header(litellm_params, metadata)
clean_metadata = {} clean_metadata = {}
try: try:
metadata = copy.deepcopy(metadata) # Avoid modifying the original metadata metadata = copy.deepcopy(
metadata
) # Avoid modifying the original metadata
except Exception: except Exception:
new_metadata = {} new_metadata = {}
for key, value in metadata.items(): for key, value in metadata.items():
@ -192,7 +219,9 @@ class BraintrustLogger(CustomLogger):
project_id = metadata.get("project_id") project_id = metadata.get("project_id")
if project_id is None: if project_id is None:
project_name = metadata.get("project_name") project_name = metadata.get("project_name")
project_id = self.get_project_id_sync(project_name) if project_name else None project_id = (
self.get_project_id_sync(project_name) if project_name else None
)
if project_id is None: if project_id is None:
if self.default_project_id is None: if self.default_project_id is None:
@ -234,7 +263,8 @@ class BraintrustLogger(CustomLogger):
"completion_tokens": usage_obj.completion_tokens, "completion_tokens": usage_obj.completion_tokens,
"total_tokens": usage_obj.total_tokens, "total_tokens": usage_obj.total_tokens,
"total_cost": cost, "total_cost": cost,
"time_to_first_token": end_time.timestamp() - start_time.timestamp(), "time_to_first_token": end_time.timestamp()
- start_time.timestamp(),
"start": start_time.timestamp(), "start": start_time.timestamp(),
"end": end_time.timestamp(), "end": end_time.timestamp(),
} }
@ -255,7 +285,9 @@ class BraintrustLogger(CustomLogger):
request_data["metrics"] = metrics request_data["metrics"] = metrics
try: try:
print_verbose(f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}") print_verbose(
f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}"
)
global_braintrust_sync_http_handler.post( global_braintrust_sync_http_handler.post(
url=f"{self.api_base}/project_logs/{project_id}/insert", url=f"{self.api_base}/project_logs/{project_id}/insert",
json={"events": [request_data]}, json={"events": [request_data]},
@ -276,20 +308,29 @@ class BraintrustLogger(CustomLogger):
output = None output = None
choices = [] choices = []
if response_obj is not None and ( if response_obj is not None and (
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse) kwargs.get("call_type", None) == "embedding"
or isinstance(response_obj, litellm.EmbeddingResponse)
): ):
output = None output = None
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse): elif response_obj is not None and isinstance(
response_obj, litellm.ModelResponse
):
output = response_obj["choices"][0]["message"].json() output = response_obj["choices"][0]["message"].json()
choices = response_obj["choices"] choices = response_obj["choices"]
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse): elif response_obj is not None and isinstance(
response_obj, litellm.TextCompletionResponse
):
output = response_obj.choices[0].text output = response_obj.choices[0].text
choices = response_obj.choices choices = response_obj.choices
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse): elif response_obj is not None and isinstance(
response_obj, litellm.ImageResponse
):
output = response_obj["data"] output = response_obj["data"]
litellm_params = kwargs.get("litellm_params", {}) litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata) metadata = self.add_metadata_from_header(litellm_params, metadata)
clean_metadata = {} clean_metadata = {}
new_metadata = {} new_metadata = {}
@ -313,7 +354,11 @@ class BraintrustLogger(CustomLogger):
project_id = metadata.get("project_id") project_id = metadata.get("project_id")
if project_id is None: if project_id is None:
project_name = metadata.get("project_name") project_name = metadata.get("project_name")
project_id = await self.get_project_id_async(project_name) if project_name else None project_id = (
await self.get_project_id_async(project_name)
if project_name
else None
)
if project_id is None: if project_id is None:
if self.default_project_id is None: if self.default_project_id is None:
@ -362,8 +407,14 @@ class BraintrustLogger(CustomLogger):
api_call_start_time = kwargs.get("api_call_start_time") api_call_start_time = kwargs.get("api_call_start_time")
completion_start_time = kwargs.get("completion_start_time") completion_start_time = kwargs.get("completion_start_time")
if api_call_start_time is not None and completion_start_time is not None: if (
metrics["time_to_first_token"] = completion_start_time.timestamp() - api_call_start_time.timestamp() api_call_start_time is not None
and completion_start_time is not None
):
metrics["time_to_first_token"] = (
completion_start_time.timestamp()
- api_call_start_time.timestamp()
)
request_data = { request_data = {
"id": litellm_call_id, "id": litellm_call_id,

View file

@ -14,7 +14,6 @@ from litellm.integrations.custom_logger import CustomLogger
class CustomBatchLogger(CustomLogger): class CustomBatchLogger(CustomLogger):
def __init__( def __init__(
self, self,
flush_lock: Optional[asyncio.Lock] = None, flush_lock: Optional[asyncio.Lock] = None,

View file

@ -7,7 +7,6 @@ from litellm.types.utils import StandardLoggingGuardrailInformation
class CustomGuardrail(CustomLogger): class CustomGuardrail(CustomLogger):
def __init__( def __init__(
self, self,
guardrail_name: Optional[str] = None, guardrail_name: Optional[str] = None,

View file

@ -31,7 +31,7 @@ from litellm.types.utils import (
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
else: else:
Span = Any Span = Any

View file

@ -233,7 +233,6 @@ class DataDogLogger(
pass pass
async def _log_async_event(self, kwargs, response_obj, start_time, end_time): async def _log_async_event(self, kwargs, response_obj, start_time, end_time):
dd_payload = self.create_datadog_logging_payload( dd_payload = self.create_datadog_logging_payload(
kwargs=kwargs, kwargs=kwargs,
response_obj=response_obj, response_obj=response_obj,

View file

@ -125,9 +125,9 @@ class GCSBucketBase(CustomBatchLogger):
if kwargs is None: if kwargs is None:
kwargs = {} kwargs = {}
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = ( standard_callback_dynamic_params: Optional[
kwargs.get("standard_callback_dynamic_params", None) StandardCallbackDynamicParams
) ] = kwargs.get("standard_callback_dynamic_params", None)
bucket_name: str bucket_name: str
path_service_account: Optional[str] path_service_account: Optional[str]

View file

@ -70,13 +70,14 @@ class GcsPubSubLogger(CustomBatchLogger):
"""Construct authorization headers using Vertex AI auth""" """Construct authorization headers using Vertex AI auth"""
from litellm import vertex_chat_completion from litellm import vertex_chat_completion
_auth_header, vertex_project = ( (
await vertex_chat_completion._ensure_access_token_async( _auth_header,
vertex_project,
) = await vertex_chat_completion._ensure_access_token_async(
credentials=self.path_service_account_json, credentials=self.path_service_account_json,
project_id=None, project_id=None,
custom_llm_provider="vertex_ai", custom_llm_provider="vertex_ai",
) )
)
auth_header, _ = vertex_chat_completion._get_token_and_url( auth_header, _ = vertex_chat_completion._get_token_and_url(
model="pub-sub", model="pub-sub",

View file

@ -155,11 +155,7 @@ class HumanloopLogger(CustomLogger):
prompt_id: str, prompt_id: str,
prompt_variables: Optional[dict], prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams, dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[ ) -> Tuple[str, List[AllMessageValues], dict,]:
str,
List[AllMessageValues],
dict,
]:
humanloop_api_key = dynamic_callback_params.get( humanloop_api_key = dynamic_callback_params.get(
"humanloop_api_key" "humanloop_api_key"
) or get_secret_str("HUMANLOOP_API_KEY") ) or get_secret_str("HUMANLOOP_API_KEY")

View file

@ -471,9 +471,9 @@ class LangFuseLogger:
# we clean out all extra litellm metadata params before logging # we clean out all extra litellm metadata params before logging
clean_metadata: Dict[str, Any] = {} clean_metadata: Dict[str, Any] = {}
if prompt_management_metadata is not None: if prompt_management_metadata is not None:
clean_metadata["prompt_management_metadata"] = ( clean_metadata[
prompt_management_metadata "prompt_management_metadata"
) ] = prompt_management_metadata
if isinstance(metadata, dict): if isinstance(metadata, dict):
for key, value in metadata.items(): for key, value in metadata.items():
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy # generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy

View file

@ -19,7 +19,6 @@ else:
class LangFuseHandler: class LangFuseHandler:
@staticmethod @staticmethod
def get_langfuse_logger_for_request( def get_langfuse_logger_for_request(
standard_callback_dynamic_params: StandardCallbackDynamicParams, standard_callback_dynamic_params: StandardCallbackDynamicParams,
@ -87,7 +86,9 @@ class LangFuseHandler:
if globalLangfuseLogger is not None: if globalLangfuseLogger is not None:
return globalLangfuseLogger return globalLangfuseLogger
credentials_dict: Dict[str, Any] = ( credentials_dict: Dict[
str, Any
] = (
{} {}
) # the global langfuse logger uses Environment Variables, there are no dynamic credentials ) # the global langfuse logger uses Environment Variables, there are no dynamic credentials
globalLangfuseLogger = in_memory_dynamic_logger_cache.get_cache( globalLangfuseLogger = in_memory_dynamic_logger_cache.get_cache(

View file

@ -172,11 +172,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
prompt_id: str, prompt_id: str,
prompt_variables: Optional[dict], prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams, dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[ ) -> Tuple[str, List[AllMessageValues], dict,]:
str,
List[AllMessageValues],
dict,
]:
return self.get_chat_completion_prompt( return self.get_chat_completion_prompt(
model, model,
messages, messages,

View file

@ -75,7 +75,6 @@ class LangsmithLogger(CustomBatchLogger):
langsmith_project: Optional[str] = None, langsmith_project: Optional[str] = None,
langsmith_base_url: Optional[str] = None, langsmith_base_url: Optional[str] = None,
) -> LangsmithCredentialsObject: ) -> LangsmithCredentialsObject:
_credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY") _credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY")
if _credentials_api_key is None: if _credentials_api_key is None:
raise Exception( raise Exception(
@ -443,9 +442,9 @@ class LangsmithLogger(CustomBatchLogger):
Otherwise, use the default credentials. Otherwise, use the default credentials.
""" """
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = ( standard_callback_dynamic_params: Optional[
kwargs.get("standard_callback_dynamic_params", None) StandardCallbackDynamicParams
) ] = kwargs.get("standard_callback_dynamic_params", None)
if standard_callback_dynamic_params is not None: if standard_callback_dynamic_params is not None:
credentials = self.get_credentials_from_env( credentials = self.get_credentials_from_env(
langsmith_api_key=standard_callback_dynamic_params.get( langsmith_api_key=standard_callback_dynamic_params.get(
@ -481,7 +480,6 @@ class LangsmithLogger(CustomBatchLogger):
asyncio.run(self.async_send_batch()) asyncio.run(self.async_send_batch())
def get_run_by_id(self, run_id): def get_run_by_id(self, run_id):
langsmith_api_key = self.default_credentials["LANGSMITH_API_KEY"] langsmith_api_key = self.default_credentials["LANGSMITH_API_KEY"]
langsmith_api_base = self.default_credentials["LANGSMITH_BASE_URL"] langsmith_api_base = self.default_credentials["LANGSMITH_BASE_URL"]

View file

@ -1,12 +1,12 @@
import json import json
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any, Union
from litellm.proxy._types import SpanAttributes from litellm.proxy._types import SpanAttributes
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
else: else:
Span = Any Span = Any

View file

@ -20,7 +20,6 @@ def parse_tool_calls(tool_calls):
return None return None
def clean_tool_call(tool_call): def clean_tool_call(tool_call):
serialized = { serialized = {
"type": tool_call.type, "type": tool_call.type,
"id": tool_call.id, "id": tool_call.id,
@ -36,7 +35,6 @@ def parse_tool_calls(tool_calls):
def parse_messages(input): def parse_messages(input):
if input is None: if input is None:
return None return None

View file

@ -48,14 +48,17 @@ class MlflowLogger(CustomLogger):
def _extract_and_set_chat_attributes(self, span, kwargs, response_obj): def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
try: try:
from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools from mlflow.tracing.utils import set_span_chat_messages # type: ignore
from mlflow.tracing.utils import set_span_chat_tools # type: ignore
except ImportError: except ImportError:
return return
inputs = self._construct_input(kwargs) inputs = self._construct_input(kwargs)
input_messages = inputs.get("messages", []) input_messages = inputs.get("messages", [])
output_messages = [c.message.model_dump(exclude_none=True) output_messages = [
for c in getattr(response_obj, "choices", [])] c.message.model_dump(exclude_none=True)
for c in getattr(response_obj, "choices", [])
]
if messages := [*input_messages, *output_messages]: if messages := [*input_messages, *output_messages]:
set_span_chat_messages(span, messages) set_span_chat_messages(span, messages)
if tools := inputs.get("tools"): if tools := inputs.get("tools"):

View file

@ -1,7 +1,7 @@
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
@ -23,10 +23,10 @@ if TYPE_CHECKING:
) )
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
Span = _Span Span = Union[_Span, Any]
SpanExporter = _SpanExporter SpanExporter = Union[_SpanExporter, Any]
UserAPIKeyAuth = _UserAPIKeyAuth UserAPIKeyAuth = Union[_UserAPIKeyAuth, Any]
ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload ManagementEndpointLoggingPayload = Union[_ManagementEndpointLoggingPayload, Any]
else: else:
Span = Any Span = Any
SpanExporter = Any SpanExporter = Any
@ -46,7 +46,6 @@ LITELLM_REQUEST_SPAN_NAME = "litellm_request"
@dataclass @dataclass
class OpenTelemetryConfig: class OpenTelemetryConfig:
exporter: Union[str, SpanExporter] = "console" exporter: Union[str, SpanExporter] = "console"
endpoint: Optional[str] = None endpoint: Optional[str] = None
headers: Optional[str] = None headers: Optional[str] = None
@ -154,7 +153,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[datetime, float]] = None,
event_metadata: Optional[dict] = None, event_metadata: Optional[dict] = None,
): ):
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -215,7 +213,6 @@ class OpenTelemetry(CustomLogger):
end_time: Optional[Union[float, datetime]] = None, end_time: Optional[Union[float, datetime]] = None,
event_metadata: Optional[dict] = None, event_metadata: Optional[dict] = None,
): ):
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -353,9 +350,9 @@ class OpenTelemetry(CustomLogger):
""" """
from opentelemetry import trace from opentelemetry import trace
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = ( standard_callback_dynamic_params: Optional[
kwargs.get("standard_callback_dynamic_params") StandardCallbackDynamicParams
) ] = kwargs.get("standard_callback_dynamic_params")
if not standard_callback_dynamic_params: if not standard_callback_dynamic_params:
return return
@ -722,7 +719,6 @@ class OpenTelemetry(CustomLogger):
span.set_attribute(key, primitive_value) span.set_attribute(key, primitive_value)
def set_raw_request_attributes(self, span: Span, kwargs, response_obj): def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
kwargs.get("optional_params", {}) kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}
custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown") custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
@ -843,12 +839,14 @@ class OpenTelemetry(CustomLogger):
headers=dynamic_headers or self.OTEL_HEADERS headers=dynamic_headers or self.OTEL_HEADERS
) )
if isinstance(self.OTEL_EXPORTER, SpanExporter): if hasattr(
self.OTEL_EXPORTER, "export"
): # Check if it has the export method that SpanExporter requires
verbose_logger.debug( verbose_logger.debug(
"OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s", "OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s",
self.OTEL_EXPORTER, self.OTEL_EXPORTER,
) )
return SimpleSpanProcessor(self.OTEL_EXPORTER) return SimpleSpanProcessor(cast(SpanExporter, self.OTEL_EXPORTER))
if self.OTEL_EXPORTER == "console": if self.OTEL_EXPORTER == "console":
verbose_logger.debug( verbose_logger.debug(
@ -907,7 +905,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
@ -961,7 +958,6 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode

View file

@ -185,7 +185,6 @@ class OpikLogger(CustomBatchLogger):
def _create_opik_payload( # noqa: PLR0915 def _create_opik_payload( # noqa: PLR0915
self, kwargs, response_obj, start_time, end_time self, kwargs, response_obj, start_time, end_time
) -> List[Dict]: ) -> List[Dict]:
# Get metadata # Get metadata
_litellm_params = kwargs.get("litellm_params", {}) or {} _litellm_params = kwargs.get("litellm_params", {}) or {}
litellm_params_metadata = _litellm_params.get("metadata", {}) or {} litellm_params_metadata = _litellm_params.get("metadata", {}) or {}

View file

@ -988,9 +988,9 @@ class PrometheusLogger(CustomLogger):
): ):
try: try:
verbose_logger.debug("setting remaining tokens requests metric") verbose_logger.debug("setting remaining tokens requests metric")
standard_logging_payload: Optional[StandardLoggingPayload] = ( standard_logging_payload: Optional[
request_kwargs.get("standard_logging_object") StandardLoggingPayload
) ] = request_kwargs.get("standard_logging_object")
if standard_logging_payload is None: if standard_logging_payload is None:
return return

View file

@ -14,7 +14,6 @@ class PromptManagementClient(TypedDict):
class PromptManagementBase(ABC): class PromptManagementBase(ABC):
@property @property
@abstractmethod @abstractmethod
def integration_name(self) -> str: def integration_name(self) -> str:
@ -83,11 +82,7 @@ class PromptManagementBase(ABC):
prompt_id: str, prompt_id: str,
prompt_variables: Optional[dict], prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams, dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[ ) -> Tuple[str, List[AllMessageValues], dict,]:
str,
List[AllMessageValues],
dict,
]:
if not self.should_run_prompt_management( if not self.should_run_prompt_management(
prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params
): ):

View file

@ -38,7 +38,7 @@ class S3Logger:
if litellm.s3_callback_params is not None: if litellm.s3_callback_params is not None:
# read in .env variables - example os.environ/AWS_BUCKET_NAME # read in .env variables - example os.environ/AWS_BUCKET_NAME
for key, value in litellm.s3_callback_params.items(): for key, value in litellm.s3_callback_params.items():
if type(value) is str and value.startswith("os.environ/"): if isinstance(value, str) and value.startswith("os.environ/"):
litellm.s3_callback_params[key] = litellm.get_secret(value) litellm.s3_callback_params[key] = litellm.get_secret(value)
# now set s3 params from litellm.s3_logger_params # now set s3 params from litellm.s3_logger_params
s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name") s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")

View file

@ -21,11 +21,11 @@ try:
# contains a (known) object attribute # contains a (known) object attribute
object: Literal["chat.completion", "edit", "text_completion"] object: Literal["chat.completion", "edit", "text_completion"]
def __getitem__(self, key: K) -> V: ... # noqa def __getitem__(self, key: K) -> V:
... # noqa
def get( # noqa def get(self, key: K, default: Optional[V] = None) -> Optional[V]: # noqa
self, key: K, default: Optional[V] = None ... # pragma: no cover
) -> Optional[V]: ... # pragma: no cover
class OpenAIRequestResponseResolver: class OpenAIRequestResponseResolver:
def __call__( def __call__(

View file

@ -10,7 +10,7 @@ from litellm.types.llms.openai import AllMessageValues
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
Span = _Span Span = Union[_Span, Any]
else: else:
Span = Any Span = Any

View file

@ -11,7 +11,9 @@ except (ImportError, AttributeError):
# Old way to access resources, which setuptools deprecated some time ago # Old way to access resources, which setuptools deprecated some time ago
import pkg_resources # type: ignore import pkg_resources # type: ignore
filename = pkg_resources.resource_filename(__name__, "litellm_core_utils/tokenizers") filename = pkg_resources.resource_filename(
__name__, "litellm_core_utils/tokenizers"
)
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv( os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
"CUSTOM_TIKTOKEN_CACHE_DIR", filename "CUSTOM_TIKTOKEN_CACHE_DIR", filename

View file

@ -79,6 +79,22 @@ def get_supported_openai_params( # noqa: PLR0915
elif custom_llm_provider == "maritalk": elif custom_llm_provider == "maritalk":
return litellm.MaritalkConfig().get_supported_openai_params(model=model) return litellm.MaritalkConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "openai": elif custom_llm_provider == "openai":
if request_type == "transcription":
transcription_provider_config = (
litellm.ProviderConfigManager.get_provider_audio_transcription_config(
model=model, provider=LlmProviders.OPENAI
)
)
if isinstance(
transcription_provider_config, litellm.OpenAIGPTAudioTranscriptionConfig
):
return transcription_provider_config.get_supported_openai_params(
model=model
)
else:
raise ValueError(
f"Unsupported provider config: {transcription_provider_config} for model: {model}"
)
return litellm.OpenAIConfig().get_supported_openai_params(model=model) return litellm.OpenAIConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "azure": elif custom_llm_provider == "azure":
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model): if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):

View file

@ -67,6 +67,7 @@ from litellm.types.utils import (
StandardCallbackDynamicParams, StandardCallbackDynamicParams,
StandardLoggingAdditionalHeaders, StandardLoggingAdditionalHeaders,
StandardLoggingHiddenParams, StandardLoggingHiddenParams,
StandardLoggingMCPToolCall,
StandardLoggingMetadata, StandardLoggingMetadata,
StandardLoggingModelCostFailureDebugInformation, StandardLoggingModelCostFailureDebugInformation,
StandardLoggingModelInformation, StandardLoggingModelInformation,
@ -239,9 +240,9 @@ class Logging(LiteLLMLoggingBaseClass):
self.litellm_trace_id = litellm_trace_id self.litellm_trace_id = litellm_trace_id
self.function_id = function_id self.function_id = function_id
self.streaming_chunks: List[Any] = [] # for generating complete stream response self.streaming_chunks: List[Any] = [] # for generating complete stream response
self.sync_streaming_chunks: List[Any] = ( self.sync_streaming_chunks: List[
[] Any
) # for generating complete stream response ] = [] # for generating complete stream response
self.log_raw_request_response = log_raw_request_response self.log_raw_request_response = log_raw_request_response
# Initialize dynamic callbacks # Initialize dynamic callbacks
@ -452,11 +453,13 @@ class Logging(LiteLLMLoggingBaseClass):
prompt_id: str, prompt_id: str,
prompt_variables: Optional[dict], prompt_variables: Optional[dict],
) -> Tuple[str, List[AllMessageValues], dict]: ) -> Tuple[str, List[AllMessageValues], dict]:
custom_logger = self.get_custom_logger_for_prompt_management(model) custom_logger = self.get_custom_logger_for_prompt_management(model)
if custom_logger: if custom_logger:
model, messages, non_default_params = ( (
custom_logger.get_chat_completion_prompt( model,
messages,
non_default_params,
) = custom_logger.get_chat_completion_prompt(
model=model, model=model,
messages=messages, messages=messages,
non_default_params=non_default_params, non_default_params=non_default_params,
@ -464,7 +467,6 @@ class Logging(LiteLLMLoggingBaseClass):
prompt_variables=prompt_variables, prompt_variables=prompt_variables,
dynamic_callback_params=self.standard_callback_dynamic_params, dynamic_callback_params=self.standard_callback_dynamic_params,
) )
)
self.messages = messages self.messages = messages
return model, messages, non_default_params return model, messages, non_default_params
@ -541,12 +543,11 @@ class Logging(LiteLLMLoggingBaseClass):
model model
): # if model name was changes pre-call, overwrite the initial model call name with the new one ): # if model name was changes pre-call, overwrite the initial model call name with the new one
self.model_call_details["model"] = model self.model_call_details["model"] = model
self.model_call_details["litellm_params"]["api_base"] = ( self.model_call_details["litellm_params"][
self._get_masked_api_base(additional_args.get("api_base", "")) "api_base"
) ] = self._get_masked_api_base(additional_args.get("api_base", ""))
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915 def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
# Log the exact input to the LLM API # Log the exact input to the LLM API
litellm.error_logs["PRE_CALL"] = locals() litellm.error_logs["PRE_CALL"] = locals()
try: try:
@ -568,19 +569,16 @@ class Logging(LiteLLMLoggingBaseClass):
self.log_raw_request_response is True self.log_raw_request_response is True
or log_raw_request_response is True or log_raw_request_response is True
): ):
_litellm_params = self.model_call_details.get("litellm_params", {}) _litellm_params = self.model_call_details.get("litellm_params", {})
_metadata = _litellm_params.get("metadata", {}) or {} _metadata = _litellm_params.get("metadata", {}) or {}
try: try:
# [Non-blocking Extra Debug Information in metadata] # [Non-blocking Extra Debug Information in metadata]
if turn_off_message_logging is True: if turn_off_message_logging is True:
_metadata[
_metadata["raw_request"] = ( "raw_request"
"redacted by litellm. \ ] = "redacted by litellm. \
'litellm.turn_off_message_logging=True'" 'litellm.turn_off_message_logging=True'"
)
else: else:
curl_command = self._get_request_curl_command( curl_command = self._get_request_curl_command(
api_base=additional_args.get("api_base", ""), api_base=additional_args.get("api_base", ""),
headers=additional_args.get("headers", {}), headers=additional_args.get("headers", {}),
@ -590,8 +588,9 @@ class Logging(LiteLLMLoggingBaseClass):
_metadata["raw_request"] = str(curl_command) _metadata["raw_request"] = str(curl_command)
# split up, so it's easier to parse in the UI # split up, so it's easier to parse in the UI
self.model_call_details["raw_request_typed_dict"] = ( self.model_call_details[
RawRequestTypedDict( "raw_request_typed_dict"
] = RawRequestTypedDict(
raw_request_api_base=str( raw_request_api_base=str(
additional_args.get("api_base") or "" additional_args.get("api_base") or ""
), ),
@ -604,20 +603,19 @@ class Logging(LiteLLMLoggingBaseClass):
), ),
error=None, error=None,
) )
)
except Exception as e: except Exception as e:
self.model_call_details["raw_request_typed_dict"] = ( self.model_call_details[
RawRequestTypedDict( "raw_request_typed_dict"
] = RawRequestTypedDict(
error=str(e), error=str(e),
) )
)
traceback.print_exc() traceback.print_exc()
_metadata["raw_request"] = ( _metadata[
"Unable to Log \ "raw_request"
] = "Unable to Log \
raw request: {}".format( raw request: {}".format(
str(e) str(e)
) )
)
if self.logger_fn and callable(self.logger_fn): if self.logger_fn and callable(self.logger_fn):
try: try:
self.logger_fn( self.logger_fn(
@ -941,9 +939,9 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug( verbose_logger.debug(
f"response_cost_failure_debug_information: {debug_info}" f"response_cost_failure_debug_information: {debug_info}"
) )
self.model_call_details["response_cost_failure_debug_information"] = ( self.model_call_details[
debug_info "response_cost_failure_debug_information"
) ] = debug_info
return None return None
try: try:
@ -968,9 +966,9 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug( verbose_logger.debug(
f"response_cost_failure_debug_information: {debug_info}" f"response_cost_failure_debug_information: {debug_info}"
) )
self.model_call_details["response_cost_failure_debug_information"] = ( self.model_call_details[
debug_info "response_cost_failure_debug_information"
) ] = debug_info
return None return None
@ -995,7 +993,6 @@ class Logging(LiteLLMLoggingBaseClass):
def should_run_callback( def should_run_callback(
self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
) -> bool: ) -> bool:
if litellm.global_disable_no_log_param: if litellm.global_disable_no_log_param:
return True return True
@ -1027,9 +1024,9 @@ class Logging(LiteLLMLoggingBaseClass):
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
if self.completion_start_time is None: if self.completion_start_time is None:
self.completion_start_time = end_time self.completion_start_time = end_time
self.model_call_details["completion_start_time"] = ( self.model_call_details[
self.completion_start_time "completion_start_time"
) ] = self.completion_start_time
self.model_call_details["log_event_type"] = "successful_api_call" self.model_call_details["log_event_type"] = "successful_api_call"
self.model_call_details["end_time"] = end_time self.model_call_details["end_time"] = end_time
self.model_call_details["cache_hit"] = cache_hit self.model_call_details["cache_hit"] = cache_hit
@ -1083,13 +1080,14 @@ class Logging(LiteLLMLoggingBaseClass):
"response_cost" "response_cost"
] ]
else: else:
self.model_call_details["response_cost"] = ( self.model_call_details[
self._response_cost_calculator(result=result) "response_cost"
) ] = self._response_cost_calculator(result=result)
## STANDARDIZED LOGGING PAYLOAD ## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = ( self.model_call_details[
get_standard_logging_object_payload( "standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details, kwargs=self.model_call_details,
init_response_obj=result, init_response_obj=result,
start_time=start_time, start_time=start_time,
@ -1098,11 +1096,11 @@ class Logging(LiteLLMLoggingBaseClass):
status="success", status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params, standard_built_in_tools_params=self.standard_built_in_tools_params,
) )
) elif isinstance(result, dict) or isinstance(result, list):
elif isinstance(result, dict): # pass-through endpoints
## STANDARDIZED LOGGING PAYLOAD ## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = ( self.model_call_details[
get_standard_logging_object_payload( "standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details, kwargs=self.model_call_details,
init_response_obj=result, init_response_obj=result,
start_time=start_time, start_time=start_time,
@ -1111,11 +1109,10 @@ class Logging(LiteLLMLoggingBaseClass):
status="success", status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params, standard_built_in_tools_params=self.standard_built_in_tools_params,
) )
)
elif standard_logging_object is not None: elif standard_logging_object is not None:
self.model_call_details["standard_logging_object"] = ( self.model_call_details[
standard_logging_object "standard_logging_object"
) ] = standard_logging_object
else: # streaming chunks + image gen. else: # streaming chunks + image gen.
self.model_call_details["response_cost"] = None self.model_call_details["response_cost"] = None
@ -1154,7 +1151,6 @@ class Logging(LiteLLMLoggingBaseClass):
standard_logging_object=kwargs.get("standard_logging_object", None), standard_logging_object=kwargs.get("standard_logging_object", None),
) )
try: try:
## BUILD COMPLETE STREAMED RESPONSE ## BUILD COMPLETE STREAMED RESPONSE
complete_streaming_response: Optional[ complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse] Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
@ -1172,15 +1168,16 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug( verbose_logger.debug(
"Logging Details LiteLLM-Success Call streaming complete" "Logging Details LiteLLM-Success Call streaming complete"
) )
self.model_call_details["complete_streaming_response"] = ( self.model_call_details[
complete_streaming_response "complete_streaming_response"
) ] = complete_streaming_response
self.model_call_details["response_cost"] = ( self.model_call_details[
self._response_cost_calculator(result=complete_streaming_response) "response_cost"
) ] = self._response_cost_calculator(result=complete_streaming_response)
## STANDARDIZED LOGGING PAYLOAD ## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = ( self.model_call_details[
get_standard_logging_object_payload( "standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details, kwargs=self.model_call_details,
init_response_obj=complete_streaming_response, init_response_obj=complete_streaming_response,
start_time=start_time, start_time=start_time,
@ -1189,7 +1186,6 @@ class Logging(LiteLLMLoggingBaseClass):
status="success", status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params, standard_built_in_tools_params=self.standard_built_in_tools_params,
) )
)
callbacks = self.get_combined_callback_list( callbacks = self.get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_success_callbacks, dynamic_success_callbacks=self.dynamic_success_callbacks,
global_callbacks=litellm.success_callback, global_callbacks=litellm.success_callback,
@ -1207,7 +1203,6 @@ class Logging(LiteLLMLoggingBaseClass):
## LOGGING HOOK ## ## LOGGING HOOK ##
for callback in callbacks: for callback in callbacks:
if isinstance(callback, CustomLogger): if isinstance(callback, CustomLogger):
self.model_call_details, result = callback.logging_hook( self.model_call_details, result = callback.logging_hook(
kwargs=self.model_call_details, kwargs=self.model_call_details,
result=result, result=result,
@ -1538,11 +1533,11 @@ class Logging(LiteLLMLoggingBaseClass):
) )
else: else:
if self.stream and complete_streaming_response: if self.stream and complete_streaming_response:
self.model_call_details["complete_response"] = ( self.model_call_details[
self.model_call_details.get( "complete_response"
] = self.model_call_details.get(
"complete_streaming_response", {} "complete_streaming_response", {}
) )
)
result = self.model_call_details["complete_response"] result = self.model_call_details["complete_response"]
openMeterLogger.log_success_event( openMeterLogger.log_success_event(
kwargs=self.model_call_details, kwargs=self.model_call_details,
@ -1581,11 +1576,11 @@ class Logging(LiteLLMLoggingBaseClass):
) )
else: else:
if self.stream and complete_streaming_response: if self.stream and complete_streaming_response:
self.model_call_details["complete_response"] = ( self.model_call_details[
self.model_call_details.get( "complete_response"
] = self.model_call_details.get(
"complete_streaming_response", {} "complete_streaming_response", {}
) )
)
result = self.model_call_details["complete_response"] result = self.model_call_details["complete_response"]
callback.log_success_event( callback.log_success_event(
@ -1659,7 +1654,6 @@ class Logging(LiteLLMLoggingBaseClass):
if self.call_type == CallTypes.aretrieve_batch.value and isinstance( if self.call_type == CallTypes.aretrieve_batch.value and isinstance(
result, LiteLLMBatch result, LiteLLMBatch
): ):
response_cost, batch_usage, batch_models = await _handle_completed_batch( response_cost, batch_usage, batch_models = await _handle_completed_batch(
batch=result, custom_llm_provider=self.custom_llm_provider batch=result, custom_llm_provider=self.custom_llm_provider
) )
@ -1692,9 +1686,9 @@ class Logging(LiteLLMLoggingBaseClass):
if complete_streaming_response is not None: if complete_streaming_response is not None:
print_verbose("Async success callbacks: Got a complete streaming response") print_verbose("Async success callbacks: Got a complete streaming response")
self.model_call_details["async_complete_streaming_response"] = ( self.model_call_details[
complete_streaming_response "async_complete_streaming_response"
) ] = complete_streaming_response
try: try:
if self.model_call_details.get("cache_hit", False) is True: if self.model_call_details.get("cache_hit", False) is True:
self.model_call_details["response_cost"] = 0.0 self.model_call_details["response_cost"] = 0.0
@ -1704,11 +1698,11 @@ class Logging(LiteLLMLoggingBaseClass):
model_call_details=self.model_call_details model_call_details=self.model_call_details
) )
# base_model defaults to None if not set on model_info # base_model defaults to None if not set on model_info
self.model_call_details["response_cost"] = ( self.model_call_details[
self._response_cost_calculator( "response_cost"
] = self._response_cost_calculator(
result=complete_streaming_response result=complete_streaming_response
) )
)
verbose_logger.debug( verbose_logger.debug(
f"Model={self.model}; cost={self.model_call_details['response_cost']}" f"Model={self.model}; cost={self.model_call_details['response_cost']}"
@ -1720,8 +1714,9 @@ class Logging(LiteLLMLoggingBaseClass):
self.model_call_details["response_cost"] = None self.model_call_details["response_cost"] = None
## STANDARDIZED LOGGING PAYLOAD ## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = ( self.model_call_details[
get_standard_logging_object_payload( "standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details, kwargs=self.model_call_details,
init_response_obj=complete_streaming_response, init_response_obj=complete_streaming_response,
start_time=start_time, start_time=start_time,
@ -1730,7 +1725,6 @@ class Logging(LiteLLMLoggingBaseClass):
status="success", status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params, standard_built_in_tools_params=self.standard_built_in_tools_params,
) )
)
callbacks = self.get_combined_callback_list( callbacks = self.get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_async_success_callbacks, dynamic_success_callbacks=self.dynamic_async_success_callbacks,
global_callbacks=litellm._async_success_callback, global_callbacks=litellm._async_success_callback,
@ -1935,8 +1929,9 @@ class Logging(LiteLLMLoggingBaseClass):
## STANDARDIZED LOGGING PAYLOAD ## STANDARDIZED LOGGING PAYLOAD
self.model_call_details["standard_logging_object"] = ( self.model_call_details[
get_standard_logging_object_payload( "standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details, kwargs=self.model_call_details,
init_response_obj={}, init_response_obj={},
start_time=start_time, start_time=start_time,
@ -1947,7 +1942,6 @@ class Logging(LiteLLMLoggingBaseClass):
original_exception=exception, original_exception=exception,
standard_built_in_tools_params=self.standard_built_in_tools_params, standard_built_in_tools_params=self.standard_built_in_tools_params,
) )
)
return start_time, end_time return start_time, end_time
async def special_failure_handlers(self, exception: Exception): async def special_failure_handlers(self, exception: Exception):
@ -2084,7 +2078,6 @@ class Logging(LiteLLMLoggingBaseClass):
) )
is not True is not True
): # custom logger class ): # custom logger class
callback.log_failure_event( callback.log_failure_event(
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
@ -2713,9 +2706,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
endpoint=arize_config.endpoint, endpoint=arize_config.endpoint,
) )
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( os.environ[
f"space_key={arize_config.space_key},api_key={arize_config.api_key}" "OTEL_EXPORTER_OTLP_TRACES_HEADERS"
) ] = f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
for callback in _in_memory_loggers: for callback in _in_memory_loggers:
if ( if (
isinstance(callback, ArizeLogger) isinstance(callback, ArizeLogger)
@ -2739,9 +2732,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
# auth can be disabled on local deployments of arize phoenix # auth can be disabled on local deployments of arize phoenix
if arize_phoenix_config.otlp_auth_headers is not None: if arize_phoenix_config.otlp_auth_headers is not None:
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( os.environ[
arize_phoenix_config.otlp_auth_headers "OTEL_EXPORTER_OTLP_TRACES_HEADERS"
) ] = arize_phoenix_config.otlp_auth_headers
for callback in _in_memory_loggers: for callback in _in_memory_loggers:
if ( if (
@ -2832,9 +2825,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
exporter="otlp_http", exporter="otlp_http",
endpoint="https://langtrace.ai/api/trace", endpoint="https://langtrace.ai/api/trace",
) )
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( os.environ[
f"api_key={os.getenv('LANGTRACE_API_KEY')}" "OTEL_EXPORTER_OTLP_TRACES_HEADERS"
) ] = f"api_key={os.getenv('LANGTRACE_API_KEY')}"
for callback in _in_memory_loggers: for callback in _in_memory_loggers:
if ( if (
isinstance(callback, OpenTelemetry) isinstance(callback, OpenTelemetry)
@ -3114,6 +3107,7 @@ class StandardLoggingPayloadSetup:
litellm_params: Optional[dict] = None, litellm_params: Optional[dict] = None,
prompt_integration: Optional[str] = None, prompt_integration: Optional[str] = None,
applied_guardrails: Optional[List[str]] = None, applied_guardrails: Optional[List[str]] = None,
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
) -> StandardLoggingMetadata: ) -> StandardLoggingMetadata:
""" """
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata. Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
@ -3160,6 +3154,7 @@ class StandardLoggingPayloadSetup:
user_api_key_end_user_id=None, user_api_key_end_user_id=None,
prompt_management_metadata=prompt_management_metadata, prompt_management_metadata=prompt_management_metadata,
applied_guardrails=applied_guardrails, applied_guardrails=applied_guardrails,
mcp_tool_call_metadata=mcp_tool_call_metadata,
) )
if isinstance(metadata, dict): if isinstance(metadata, dict):
# Filter the metadata dictionary to include only the specified keys # Filter the metadata dictionary to include only the specified keys
@ -3223,7 +3218,6 @@ class StandardLoggingPayloadSetup:
custom_llm_provider: Optional[str], custom_llm_provider: Optional[str],
init_response_obj: Union[Any, BaseModel, dict], init_response_obj: Union[Any, BaseModel, dict],
) -> StandardLoggingModelInformation: ) -> StandardLoggingModelInformation:
model_cost_name = _select_model_name_for_cost_calc( model_cost_name = _select_model_name_for_cost_calc(
model=None, model=None,
completion_response=init_response_obj, # type: ignore completion_response=init_response_obj, # type: ignore
@ -3286,7 +3280,6 @@ class StandardLoggingPayloadSetup:
def get_additional_headers( def get_additional_headers(
additiona_headers: Optional[dict], additiona_headers: Optional[dict],
) -> Optional[StandardLoggingAdditionalHeaders]: ) -> Optional[StandardLoggingAdditionalHeaders]:
if additiona_headers is None: if additiona_headers is None:
return None return None
@ -3322,11 +3315,11 @@ class StandardLoggingPayloadSetup:
for key in StandardLoggingHiddenParams.__annotations__.keys(): for key in StandardLoggingHiddenParams.__annotations__.keys():
if key in hidden_params: if key in hidden_params:
if key == "additional_headers": if key == "additional_headers":
clean_hidden_params["additional_headers"] = ( clean_hidden_params[
StandardLoggingPayloadSetup.get_additional_headers( "additional_headers"
] = StandardLoggingPayloadSetup.get_additional_headers(
hidden_params[key] hidden_params[key]
) )
)
else: else:
clean_hidden_params[key] = hidden_params[key] # type: ignore clean_hidden_params[key] = hidden_params[key] # type: ignore
return clean_hidden_params return clean_hidden_params
@ -3463,13 +3456,15 @@ def get_standard_logging_object_payload(
) )
# cleanup timestamps # cleanup timestamps
start_time_float, end_time_float, completion_start_time_float = ( (
StandardLoggingPayloadSetup.cleanup_timestamps( start_time_float,
end_time_float,
completion_start_time_float,
) = StandardLoggingPayloadSetup.cleanup_timestamps(
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
completion_start_time=completion_start_time, completion_start_time=completion_start_time,
) )
)
response_time = StandardLoggingPayloadSetup.get_response_time( response_time = StandardLoggingPayloadSetup.get_response_time(
start_time_float=start_time_float, start_time_float=start_time_float,
end_time_float=end_time_float, end_time_float=end_time_float,
@ -3486,6 +3481,7 @@ def get_standard_logging_object_payload(
litellm_params=litellm_params, litellm_params=litellm_params,
prompt_integration=kwargs.get("prompt_integration", None), prompt_integration=kwargs.get("prompt_integration", None),
applied_guardrails=kwargs.get("applied_guardrails", None), applied_guardrails=kwargs.get("applied_guardrails", None),
mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
) )
_request_body = proxy_server_request.get("body", {}) _request_body = proxy_server_request.get("body", {})
@ -3495,7 +3491,6 @@ def get_standard_logging_object_payload(
saved_cache_cost: float = 0.0 saved_cache_cost: float = 0.0
if cache_hit is True: if cache_hit is True:
id = f"{id}_cache_hit{time.time()}" # do not duplicate the request id id = f"{id}_cache_hit{time.time()}" # do not duplicate the request id
saved_cache_cost = ( saved_cache_cost = (
logging_obj._response_cost_calculator( logging_obj._response_cost_calculator(
@ -3626,6 +3621,7 @@ def get_standard_logging_metadata(
user_api_key_end_user_id=None, user_api_key_end_user_id=None,
prompt_management_metadata=None, prompt_management_metadata=None,
applied_guardrails=None, applied_guardrails=None,
mcp_tool_call_metadata=None,
) )
if isinstance(metadata, dict): if isinstance(metadata, dict):
# Filter the metadata dictionary to include only the specified keys # Filter the metadata dictionary to include only the specified keys
@ -3658,9 +3654,9 @@ def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]):
): ):
for k, v in metadata["user_api_key_metadata"].items(): for k, v in metadata["user_api_key_metadata"].items():
if k == "logging": # prevent logging user logging keys if k == "logging": # prevent logging user logging keys
cleaned_user_api_key_metadata[k] = ( cleaned_user_api_key_metadata[
"scrubbed_by_litellm_for_sensitive_keys" k
) ] = "scrubbed_by_litellm_for_sensitive_keys"
else: else:
cleaned_user_api_key_metadata[k] = v cleaned_user_api_key_metadata[k] = v

View file

@ -1,7 +1,7 @@
# What is this? # What is this?
## Helper utilities for cost_per_token() ## Helper utilities for cost_per_token()
from typing import Optional, Tuple from typing import Optional, Tuple, cast
import litellm import litellm
from litellm import verbose_logger from litellm import verbose_logger
@ -121,6 +121,31 @@ def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> floa
return model_info["output_cost_per_token"] return model_info["output_cost_per_token"]
def calculate_cost_component(
model_info: ModelInfo, cost_key: str, usage_value: Optional[float]
) -> float:
"""
Generic cost calculator for any usage component
Args:
model_info: Dictionary containing cost information
cost_key: The key for the cost multiplier in model_info (e.g., 'input_cost_per_audio_token')
usage_value: The actual usage value (e.g., number of tokens, characters, seconds)
Returns:
float: The calculated cost
"""
cost_per_unit = model_info.get(cost_key)
if (
cost_per_unit is not None
and isinstance(cost_per_unit, float)
and usage_value is not None
and usage_value > 0
):
return float(usage_value) * cost_per_unit
return 0.0
def generic_cost_per_token( def generic_cost_per_token(
model: str, usage: Usage, custom_llm_provider: str model: str, usage: Usage, custom_llm_provider: str
) -> Tuple[float, float]: ) -> Tuple[float, float]:
@ -136,6 +161,7 @@ def generic_cost_per_token(
Returns: Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
""" """
## GET MODEL INFO ## GET MODEL INFO
model_info = get_model_info(model=model, custom_llm_provider=custom_llm_provider) model_info = get_model_info(model=model, custom_llm_provider=custom_llm_provider)
@ -143,38 +169,124 @@ def generic_cost_per_token(
### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing) ### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
prompt_cost = 0.0 prompt_cost = 0.0
### PROCESSING COST ### PROCESSING COST
non_cache_hit_tokens = usage.prompt_tokens text_tokens = usage.prompt_tokens
cache_hit_tokens = 0 cache_hit_tokens = 0
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens: audio_tokens = 0
cache_hit_tokens = usage.prompt_tokens_details.cached_tokens character_count = 0
non_cache_hit_tokens = non_cache_hit_tokens - cache_hit_tokens image_count = 0
video_length_seconds = 0
if usage.prompt_tokens_details:
cache_hit_tokens = (
cast(
Optional[int], getattr(usage.prompt_tokens_details, "cached_tokens", 0)
)
or 0
)
text_tokens = (
cast(
Optional[int], getattr(usage.prompt_tokens_details, "text_tokens", None)
)
or 0 # default to prompt tokens, if this field is not set
)
audio_tokens = (
cast(Optional[int], getattr(usage.prompt_tokens_details, "audio_tokens", 0))
or 0
)
character_count = (
cast(
Optional[int],
getattr(usage.prompt_tokens_details, "character_count", 0),
)
or 0
)
image_count = (
cast(Optional[int], getattr(usage.prompt_tokens_details, "image_count", 0))
or 0
)
video_length_seconds = (
cast(
Optional[int],
getattr(usage.prompt_tokens_details, "video_length_seconds", 0),
)
or 0
)
## EDGE CASE - text tokens not set inside PromptTokensDetails
if text_tokens == 0:
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage) prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
prompt_cost = float(non_cache_hit_tokens) * prompt_base_cost prompt_cost = float(text_tokens) * prompt_base_cost
_cache_read_input_token_cost = model_info.get("cache_read_input_token_cost") ### CACHE READ COST
if ( prompt_cost += calculate_cost_component(
_cache_read_input_token_cost is not None model_info, "cache_read_input_token_cost", cache_hit_tokens
and usage.prompt_tokens_details )
and usage.prompt_tokens_details.cached_tokens
): ### AUDIO COST
prompt_cost += ( prompt_cost += calculate_cost_component(
float(usage.prompt_tokens_details.cached_tokens) model_info, "input_cost_per_audio_token", audio_tokens
* _cache_read_input_token_cost
) )
### CACHE WRITING COST ### CACHE WRITING COST
_cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost") prompt_cost += calculate_cost_component(
if _cache_creation_input_token_cost is not None: model_info,
prompt_cost += ( "cache_creation_input_token_cost",
float(usage._cache_creation_input_tokens) * _cache_creation_input_token_cost usage._cache_creation_input_tokens,
)
### CHARACTER COST
prompt_cost += calculate_cost_component(
model_info, "input_cost_per_character", character_count
)
### IMAGE COUNT COST
prompt_cost += calculate_cost_component(
model_info, "input_cost_per_image", image_count
)
### VIDEO LENGTH COST
prompt_cost += calculate_cost_component(
model_info, "input_cost_per_video_per_second", video_length_seconds
) )
## CALCULATE OUTPUT COST ## CALCULATE OUTPUT COST
completion_base_cost = _get_completion_token_base_cost( completion_base_cost = _get_completion_token_base_cost(
model_info=model_info, usage=usage model_info=model_info, usage=usage
) )
completion_cost = usage["completion_tokens"] * completion_base_cost text_tokens = usage.completion_tokens
audio_tokens = 0
if usage.completion_tokens_details is not None:
audio_tokens = (
cast(
Optional[int],
getattr(usage.completion_tokens_details, "audio_tokens", 0),
)
or 0
)
text_tokens = (
cast(
Optional[int],
getattr(usage.completion_tokens_details, "text_tokens", None),
)
or usage.completion_tokens # default to completion tokens, if this field is not set
)
## TEXT COST
completion_cost = float(text_tokens) * completion_base_cost
_output_cost_per_audio_token: Optional[float] = model_info.get(
"output_cost_per_audio_token"
)
## AUDIO COST
if (
_output_cost_per_audio_token is not None
and audio_tokens is not None
and audio_tokens > 0
):
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
return prompt_cost, completion_cost return prompt_cost, completion_cost

Some files were not shown because too many files have changed in this diff Show more