mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge branch 'main' into litellm_sagemaker_fix_stream
This commit is contained in:
commit
83ba96b8c6
452 changed files with 13927 additions and 3613 deletions
|
@ -3,6 +3,18 @@ orbs:
|
||||||
codecov: codecov/codecov@4.0.1
|
codecov: codecov/codecov@4.0.1
|
||||||
node: circleci/node@5.1.0 # Add this line to declare the node orb
|
node: circleci/node@5.1.0 # Add this line to declare the node orb
|
||||||
|
|
||||||
|
commands:
|
||||||
|
setup_google_dns:
|
||||||
|
steps:
|
||||||
|
- run:
|
||||||
|
name: "Configure Google DNS"
|
||||||
|
command: |
|
||||||
|
# Backup original resolv.conf
|
||||||
|
sudo cp /etc/resolv.conf /etc/resolv.conf.backup
|
||||||
|
# Add both local and Google DNS servers
|
||||||
|
echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
|
||||||
|
echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
|
||||||
|
echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
local_testing:
|
local_testing:
|
||||||
|
@ -15,7 +27,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -134,7 +146,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -234,7 +246,13 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
|
- run:
|
||||||
|
name: DNS lookup for Redis host
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y dnsutils
|
||||||
|
dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -334,6 +352,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -388,6 +407,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -429,6 +449,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -479,7 +500,13 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install PostgreSQL
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install postgresql postgresql-contrib
|
||||||
|
echo 'export PATH=/usr/lib/postgresql/*/bin:$PATH' >> $BASH_ENV
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -534,6 +561,7 @@ jobs:
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
pip install "pytest-postgresql==7.0.1"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
- ./venv
|
- ./venv
|
||||||
|
@ -569,7 +597,7 @@ jobs:
|
||||||
- litellm_proxy_unit_tests_coverage
|
- litellm_proxy_unit_tests_coverage
|
||||||
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
|
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
|
||||||
docker:
|
docker:
|
||||||
- image: cimg/python:3.11
|
- image: cimg/python:3.13.1
|
||||||
auth:
|
auth:
|
||||||
username: ${DOCKERHUB_USERNAME}
|
username: ${DOCKERHUB_USERNAME}
|
||||||
password: ${DOCKERHUB_PASSWORD}
|
password: ${DOCKERHUB_PASSWORD}
|
||||||
|
@ -577,6 +605,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -618,6 +647,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -654,6 +684,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -696,6 +727,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -740,6 +772,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -782,6 +815,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -828,6 +862,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -872,6 +907,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -918,6 +954,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -960,6 +997,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1002,6 +1040,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1048,6 +1087,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1080,6 +1120,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1104,6 +1145,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
# Install Helm
|
# Install Helm
|
||||||
- run:
|
- run:
|
||||||
name: Install Helm
|
name: Install Helm
|
||||||
|
@ -1173,6 +1215,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1209,6 +1252,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Python 3.9
|
name: Install Python 3.9
|
||||||
command: |
|
command: |
|
||||||
|
@ -1283,6 +1327,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1418,6 +1463,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1542,6 +1588,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1704,6 +1751,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1815,6 +1863,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1897,6 +1946,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
# Remove Docker CLI installation since it's already available in machine executor
|
# Remove Docker CLI installation since it's already available in machine executor
|
||||||
- run:
|
- run:
|
||||||
name: Install Python 3.13
|
name: Install Python 3.13
|
||||||
|
@ -1994,6 +2044,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -2039,6 +2090,8 @@ jobs:
|
||||||
pip install "google-cloud-aiplatform==1.59.0"
|
pip install "google-cloud-aiplatform==1.59.0"
|
||||||
pip install "anthropic==0.49.0"
|
pip install "anthropic==0.49.0"
|
||||||
pip install "langchain_mcp_adapters==0.0.5"
|
pip install "langchain_mcp_adapters==0.0.5"
|
||||||
|
pip install "langchain_openai==0.2.1"
|
||||||
|
pip install "langgraph==0.3.18"
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
|
@ -2251,6 +2304,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Build UI
|
name: Build UI
|
||||||
command: |
|
command: |
|
||||||
|
@ -2365,6 +2419,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
command: |
|
command: |
|
||||||
|
@ -2387,6 +2442,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
command: |
|
command: |
|
||||||
|
|
206
.github/workflows/publish-migrations.yml
vendored
Normal file
206
.github/workflows/publish-migrations.yml
vendored
Normal file
|
@ -0,0 +1,206 @@
|
||||||
|
name: Publish Prisma Migrations
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- 'schema.prisma' # Check root schema.prisma
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
publish-migrations:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:14
|
||||||
|
env:
|
||||||
|
POSTGRES_DB: temp_db
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
ports:
|
||||||
|
- 5432:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
# Add shadow database service
|
||||||
|
postgres_shadow:
|
||||||
|
image: postgres:14
|
||||||
|
env:
|
||||||
|
POSTGRES_DB: shadow_db
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
ports:
|
||||||
|
- 5433:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
pip install prisma
|
||||||
|
pip install python-dotenv
|
||||||
|
|
||||||
|
- name: Generate Initial Migration if None Exists
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
mkdir -p deploy/migrations
|
||||||
|
echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
|
||||||
|
|
||||||
|
if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
|
||||||
|
echo "No existing migrations found, creating baseline..."
|
||||||
|
VERSION=$(date +%Y%m%d%H%M%S)
|
||||||
|
mkdir -p deploy/migrations/${VERSION}_initial
|
||||||
|
|
||||||
|
echo "Generating initial migration..."
|
||||||
|
# Save raw output for debugging
|
||||||
|
prisma migrate diff \
|
||||||
|
--from-empty \
|
||||||
|
--to-schema-datamodel schema.prisma \
|
||||||
|
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||||
|
--script > deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
|
||||||
|
echo "Raw migration file content:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
|
||||||
|
echo "Cleaning migration file..."
|
||||||
|
# Clean the file
|
||||||
|
sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
|
||||||
|
# Verify the migration file
|
||||||
|
if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
|
||||||
|
echo "ERROR: Migration file is empty after cleaning"
|
||||||
|
echo "Original content was:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Final migration file content:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
|
||||||
|
# Verify it starts with SQL
|
||||||
|
if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
|
||||||
|
echo "ERROR: Migration file does not start with SQL command or comment"
|
||||||
|
echo "First line is:"
|
||||||
|
head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
echo "Full content is:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Compare and Generate Migration
|
||||||
|
if: success()
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
# Create temporary migration workspace
|
||||||
|
mkdir -p temp_migrations
|
||||||
|
|
||||||
|
# Copy existing migrations (will not fail if directory is empty)
|
||||||
|
cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
|
||||||
|
|
||||||
|
VERSION=$(date +%Y%m%d%H%M%S)
|
||||||
|
|
||||||
|
# Generate diff against existing migrations or empty state
|
||||||
|
prisma migrate diff \
|
||||||
|
--from-migrations temp_migrations \
|
||||||
|
--to-schema-datamodel schema.prisma \
|
||||||
|
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||||
|
--script > temp_migrations/migration_${VERSION}.sql
|
||||||
|
|
||||||
|
# Check if there are actual changes
|
||||||
|
if [ -s temp_migrations/migration_${VERSION}.sql ]; then
|
||||||
|
echo "Changes detected, creating new migration"
|
||||||
|
mkdir -p deploy/migrations/${VERSION}_schema_update
|
||||||
|
mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||||
|
echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
|
||||||
|
else
|
||||||
|
echo "No schema changes detected"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Verify Migration
|
||||||
|
if: success()
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
# Create test database
|
||||||
|
psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
|
||||||
|
|
||||||
|
# Apply all migrations in order to verify
|
||||||
|
for migration in deploy/migrations/*/migration.sql; do
|
||||||
|
echo "Applying migration: $migration"
|
||||||
|
psql "${SHADOW_DATABASE_URL}" -f $migration
|
||||||
|
done
|
||||||
|
|
||||||
|
# Add this step before create-pull-request to debug permissions
|
||||||
|
- name: Check Token Permissions
|
||||||
|
run: |
|
||||||
|
echo "Checking token permissions..."
|
||||||
|
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||||
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
|
https://api.github.com/repos/BerriAI/litellm/collaborators
|
||||||
|
|
||||||
|
echo "\nChecking if token can create PRs..."
|
||||||
|
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||||
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
|
https://api.github.com/repos/BerriAI/litellm
|
||||||
|
|
||||||
|
# Add this debug step before git push
|
||||||
|
- name: Debug Changed Files
|
||||||
|
run: |
|
||||||
|
echo "Files staged for commit:"
|
||||||
|
git diff --name-status --staged
|
||||||
|
|
||||||
|
echo "\nAll changed files:"
|
||||||
|
git status
|
||||||
|
|
||||||
|
- name: Create Pull Request
|
||||||
|
if: success()
|
||||||
|
uses: peter-evans/create-pull-request@v5
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
commit-message: "chore: update prisma migrations"
|
||||||
|
title: "Update Prisma Migrations"
|
||||||
|
body: |
|
||||||
|
Auto-generated migration based on schema.prisma changes.
|
||||||
|
|
||||||
|
Generated files:
|
||||||
|
- deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||||
|
- deploy/migrations/${VERSION}_schema_update/README.md
|
||||||
|
branch: feat/prisma-migration-${{ env.VERSION }}
|
||||||
|
base: main
|
||||||
|
delete-branch: true
|
||||||
|
|
||||||
|
- name: Generate and Save Migrations
|
||||||
|
run: |
|
||||||
|
# Only add migration files
|
||||||
|
git add deploy/migrations/
|
||||||
|
git status # Debug what's being committed
|
||||||
|
git commit -m "chore: update prisma migrations"
|
53
.github/workflows/test-linting.yml
vendored
Normal file
53
.github/workflows/test-linting.yml
vendored
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
name: LiteLLM Linting
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: snok/install-poetry@v1
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
poetry install --with dev
|
||||||
|
|
||||||
|
- name: Run Black formatting check
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run black . --check
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Run Ruff linting
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run ruff check .
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Run MyPy type checking
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run mypy . --ignore-missing-imports
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Check for circular imports
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run python ../tests/documentation_tests/test_circular_imports.py
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Check import safety
|
||||||
|
run: |
|
||||||
|
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
35
.github/workflows/test-litellm.yml
vendored
Normal file
35
.github/workflows/test-litellm.yml
vendored
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
name: LiteLLM Mock Tests (folder - tests/litellm)
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Thank You Message
|
||||||
|
run: |
|
||||||
|
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: snok/install-poetry@v1
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
poetry install --with dev,proxy-dev --extras proxy
|
||||||
|
poetry run pip install pytest-xdist
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
poetry run pytest tests/litellm -x -vv -n 4
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -83,4 +83,5 @@ tests/llm_translation/test_vertex_key.json
|
||||||
litellm/proxy/migrations/0_init/migration.sql
|
litellm/proxy/migrations/0_init/migration.sql
|
||||||
litellm/proxy/db/migrations/0_init/migration.sql
|
litellm/proxy/db/migrations/0_init/migration.sql
|
||||||
litellm/proxy/db/migrations/*
|
litellm/proxy/db/migrations/*
|
||||||
|
litellm/proxy/migrations/*config.yaml
|
||||||
litellm/proxy/migrations/*
|
litellm/proxy/migrations/*
|
|
@ -6,44 +6,35 @@ repos:
|
||||||
entry: pyright
|
entry: pyright
|
||||||
language: system
|
language: system
|
||||||
types: [python]
|
types: [python]
|
||||||
files: ^litellm/
|
files: ^(litellm/|litellm_proxy_extras/)
|
||||||
- id: isort
|
- id: isort
|
||||||
name: isort
|
name: isort
|
||||||
entry: isort
|
entry: isort
|
||||||
language: system
|
language: system
|
||||||
types: [python]
|
types: [python]
|
||||||
files: litellm/.*\.py
|
files: (litellm/|litellm_proxy_extras/).*\.py
|
||||||
exclude: ^litellm/__init__.py$
|
exclude: ^litellm/__init__.py$
|
||||||
- repo: https://github.com/psf/black
|
|
||||||
rev: 24.2.0
|
|
||||||
hooks:
|
|
||||||
- id: black
|
- id: black
|
||||||
|
name: black
|
||||||
|
entry: poetry run black
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
files: (litellm/|litellm_proxy_extras/).*\.py
|
||||||
- repo: https://github.com/pycqa/flake8
|
- repo: https://github.com/pycqa/flake8
|
||||||
rev: 7.0.0 # The version of flake8 to use
|
rev: 7.0.0 # The version of flake8 to use
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
|
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
|
||||||
additional_dependencies: [flake8-print]
|
additional_dependencies: [flake8-print]
|
||||||
files: litellm/.*\.py
|
files: (litellm/|litellm_proxy_extras/).*\.py
|
||||||
# - id: flake8
|
|
||||||
# name: flake8 (router.py function length)
|
|
||||||
# files: ^litellm/router\.py$
|
|
||||||
# args: [--max-function-length=40]
|
|
||||||
# # additional_dependencies: [flake8-functions]
|
|
||||||
- repo: https://github.com/python-poetry/poetry
|
- repo: https://github.com/python-poetry/poetry
|
||||||
rev: 1.8.0
|
rev: 1.8.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: poetry-check
|
- id: poetry-check
|
||||||
|
files: ^(pyproject.toml|litellm-proxy-extras/pyproject.toml)$
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: check-files-match
|
- id: check-files-match
|
||||||
name: Check if files match
|
name: Check if files match
|
||||||
entry: python3 ci_cd/check_files_match.py
|
entry: python3 ci_cd/check_files_match.py
|
||||||
language: system
|
language: system
|
||||||
# - id: check-file-length
|
|
||||||
# name: Check file length
|
|
||||||
# entry: python check_file_length.py
|
|
||||||
# args: ["10000"] # set your desired maximum number of lines
|
|
||||||
# language: python
|
|
||||||
# files: litellm/.*\.py
|
|
||||||
# exclude: ^litellm/tests/
|
|
3
Makefile
3
Makefile
|
@ -14,6 +14,9 @@ help:
|
||||||
install-dev:
|
install-dev:
|
||||||
poetry install --with dev
|
poetry install --with dev
|
||||||
|
|
||||||
|
install-proxy-dev:
|
||||||
|
poetry install --with dev,proxy-dev
|
||||||
|
|
||||||
lint: install-dev
|
lint: install-dev
|
||||||
poetry run pip install types-requests types-setuptools types-redis types-PyYAML
|
poetry run pip install types-requests types-setuptools types-redis types-PyYAML
|
||||||
cd litellm && poetry run mypy . --ignore-missing-imports
|
cd litellm && poetry run mypy . --ignore-missing-imports
|
||||||
|
|
|
@ -16,9 +16,6 @@
|
||||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
|
|
||||||
<img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
|
|
||||||
</a>
|
|
||||||
<a href="https://www.ycombinator.com/companies/berriai">
|
<a href="https://www.ycombinator.com/companies/berriai">
|
||||||
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
||||||
</a>
|
</a>
|
||||||
|
|
60
ci_cd/baseline_db.py
Normal file
60
ci_cd/baseline_db.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def create_baseline():
|
||||||
|
"""Create baseline migration in deploy/migrations"""
|
||||||
|
try:
|
||||||
|
# Get paths
|
||||||
|
root_dir = Path(__file__).parent.parent
|
||||||
|
deploy_dir = root_dir / "deploy"
|
||||||
|
migrations_dir = deploy_dir / "migrations"
|
||||||
|
schema_path = root_dir / "schema.prisma"
|
||||||
|
|
||||||
|
# Create migrations directory
|
||||||
|
migrations_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create migration_lock.toml if it doesn't exist
|
||||||
|
lock_file = migrations_dir / "migration_lock.toml"
|
||||||
|
if not lock_file.exists():
|
||||||
|
lock_file.write_text('provider = "postgresql"\n')
|
||||||
|
|
||||||
|
# Create timestamp-based migration directory
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
migration_dir = migrations_dir / f"{timestamp}_baseline"
|
||||||
|
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Generate migration SQL
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"prisma",
|
||||||
|
"migrate",
|
||||||
|
"diff",
|
||||||
|
"--from-empty",
|
||||||
|
"--to-schema-datamodel",
|
||||||
|
str(schema_path),
|
||||||
|
"--script",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write the SQL to migration.sql
|
||||||
|
migration_file = migration_dir / "migration.sql"
|
||||||
|
migration_file.write_text(result.stdout)
|
||||||
|
|
||||||
|
print(f"Created baseline migration in {migration_dir}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error running prisma command: {e.stderr}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating baseline migration: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
create_baseline()
|
19
ci_cd/publish-proxy-extras.sh
Normal file
19
ci_cd/publish-proxy-extras.sh
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Exit on error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "🚀 Building and publishing litellm-proxy-extras"
|
||||||
|
|
||||||
|
# Navigate to litellm-proxy-extras directory
|
||||||
|
cd "$(dirname "$0")/../litellm-proxy-extras"
|
||||||
|
|
||||||
|
# Build the package
|
||||||
|
echo "📦 Building package..."
|
||||||
|
poetry build
|
||||||
|
|
||||||
|
# Publish to PyPI
|
||||||
|
echo "🌎 Publishing to PyPI..."
|
||||||
|
poetry publish
|
||||||
|
|
||||||
|
echo "✅ Done! Package published successfully"
|
95
ci_cd/run_migration.py
Normal file
95
ci_cd/run_migration.py
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import testing.postgresql
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def create_migration(migration_name: str = None):
|
||||||
|
"""
|
||||||
|
Create a new migration SQL file in the migrations directory by comparing
|
||||||
|
current database state with schema
|
||||||
|
|
||||||
|
Args:
|
||||||
|
migration_name (str): Name for the migration
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get paths
|
||||||
|
root_dir = Path(__file__).parent.parent
|
||||||
|
migrations_dir = root_dir / "litellm-proxy-extras" / "litellm_proxy_extras" / "migrations"
|
||||||
|
schema_path = root_dir / "schema.prisma"
|
||||||
|
|
||||||
|
# Create temporary PostgreSQL database
|
||||||
|
with testing.postgresql.Postgresql() as postgresql:
|
||||||
|
db_url = postgresql.url()
|
||||||
|
|
||||||
|
# Create temporary migrations directory next to schema.prisma
|
||||||
|
temp_migrations_dir = schema_path.parent / "migrations"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Copy existing migrations to temp directory
|
||||||
|
if temp_migrations_dir.exists():
|
||||||
|
shutil.rmtree(temp_migrations_dir)
|
||||||
|
shutil.copytree(migrations_dir, temp_migrations_dir)
|
||||||
|
|
||||||
|
# Apply existing migrations to temp database
|
||||||
|
os.environ["DATABASE_URL"] = db_url
|
||||||
|
subprocess.run(
|
||||||
|
["prisma", "migrate", "deploy", "--schema", str(schema_path)],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate diff between current database and schema
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"prisma",
|
||||||
|
"migrate",
|
||||||
|
"diff",
|
||||||
|
"--from-url",
|
||||||
|
db_url,
|
||||||
|
"--to-schema-datamodel",
|
||||||
|
str(schema_path),
|
||||||
|
"--script",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.stdout.strip():
|
||||||
|
# Generate timestamp and create migration directory
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
migration_name = migration_name or "unnamed_migration"
|
||||||
|
migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
|
||||||
|
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Write the SQL to migration.sql
|
||||||
|
migration_file = migration_dir / "migration.sql"
|
||||||
|
migration_file.write_text(result.stdout)
|
||||||
|
|
||||||
|
print(f"Created migration in {migration_dir}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print("No schema changes detected. Migration not needed.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up: remove temporary migrations directory
|
||||||
|
if temp_migrations_dir.exists():
|
||||||
|
shutil.rmtree(temp_migrations_dir)
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error generating migration: {e.stderr}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating migration: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# If running directly, can optionally pass migration name as argument
|
||||||
|
import sys
|
||||||
|
|
||||||
|
migration_name = sys.argv[1] if len(sys.argv) > 1 else None
|
||||||
|
create_migration(migration_name)
|
|
@ -1,5 +1,35 @@
|
||||||
version: "3.11"
|
version: "3.11"
|
||||||
services:
|
services:
|
||||||
|
litellm:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
args:
|
||||||
|
target: runtime
|
||||||
|
image: ghcr.io/berriai/litellm:main-stable
|
||||||
|
#########################################
|
||||||
|
## Uncomment these lines to start proxy with a config.yaml file ##
|
||||||
|
# volumes:
|
||||||
|
# - ./config.yaml:/app/config.yaml <<- this is missing in the docker-compose file currently
|
||||||
|
# command:
|
||||||
|
# - "--config=/app/config.yaml"
|
||||||
|
##############################################
|
||||||
|
ports:
|
||||||
|
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
|
||||||
|
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
||||||
|
env_file:
|
||||||
|
- .env # Load local .env file
|
||||||
|
depends_on:
|
||||||
|
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
|
||||||
|
healthcheck: # Defines the health check configuration for the container
|
||||||
|
test: [ "CMD", "curl", "-f", "http://localhost:4000/health/liveliness || exit 1" ] # Command to execute for health check
|
||||||
|
interval: 30s # Perform health check every 30 seconds
|
||||||
|
timeout: 10s # Health check command times out after 10 seconds
|
||||||
|
retries: 3 # Retry up to 3 times if health check fails
|
||||||
|
start_period: 40s # Wait 40 seconds after container start before beginning health checks
|
||||||
|
|
||||||
|
|
||||||
db:
|
db:
|
||||||
image: postgres:16
|
image: postgres:16
|
||||||
restart: always
|
restart: always
|
||||||
|
@ -16,3 +46,23 @@ services:
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 10
|
retries: 10
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus
|
||||||
|
volumes:
|
||||||
|
- prometheus_data:/prometheus
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
- '--storage.tsdb.retention.time=15d'
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus_data:
|
||||||
|
driver: local
|
||||||
|
postgres_data:
|
||||||
|
name: litellm_postgres_data # Named volume for Postgres data persistence
|
||||||
|
|
||||||
|
|
|
@ -4,21 +4,177 @@ import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
# /mcp [BETA] - Model Context Protocol
|
# /mcp [BETA] - Model Context Protocol
|
||||||
|
|
||||||
Use Model Context Protocol with LiteLLM
|
## Expose MCP tools on LiteLLM Proxy Server
|
||||||
|
|
||||||
|
This allows you to define tools that can be called by any MCP compatible client. Define your `mcp_servers` with LiteLLM and all your clients can list and call available tools.
|
||||||
|
|
||||||
<Image
|
<Image
|
||||||
img={require('../img/litellm_mcp.png')}
|
img={require('../img/mcp_2.png')}
|
||||||
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
||||||
/>
|
/>
|
||||||
<p style={{textAlign: 'left', color: '#666'}}>
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
|
LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
#### How it works
|
||||||
|
|
||||||
## Overview
|
LiteLLM exposes the following MCP endpoints:
|
||||||
|
|
||||||
LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
|
- `/mcp/tools/list` - List all available tools
|
||||||
|
- `/mcp/tools/call` - Call a specific tool with the provided arguments
|
||||||
|
|
||||||
|
When MCP clients connect to LiteLLM they can follow this workflow:
|
||||||
|
|
||||||
|
1. Connect to the LiteLLM MCP server
|
||||||
|
2. List all available tools on LiteLLM
|
||||||
|
3. Client makes LLM API request with tool call(s)
|
||||||
|
4. LLM API returns which tools to call and with what arguments
|
||||||
|
5. MCP client makes MCP tool calls to LiteLLM
|
||||||
|
6. LiteLLM makes the tool calls to the appropriate MCP server
|
||||||
|
7. LiteLLM returns the tool call results to the MCP client
|
||||||
|
|
||||||
|
#### Usage
|
||||||
|
|
||||||
|
#### 1. Define your tools on under `mcp_servers` in your config.yaml file.
|
||||||
|
|
||||||
|
LiteLLM allows you to define your tools on the `mcp_servers` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
|
||||||
|
|
||||||
|
```yaml title="config.yaml" showLineNumbers
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4o
|
||||||
|
api_key: sk-xxxxxxx
|
||||||
|
|
||||||
|
mcp_servers:
|
||||||
|
{
|
||||||
|
"zapier_mcp": {
|
||||||
|
"url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
|
||||||
|
},
|
||||||
|
"fetch": {
|
||||||
|
"url": "http://localhost:8000/sse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### 2. Start LiteLLM Gateway
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="docker" label="Docker Run">
|
||||||
|
|
||||||
|
```shell title="Docker Run" showLineNumbers
|
||||||
|
docker run -d \
|
||||||
|
-p 4000:4000 \
|
||||||
|
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||||
|
--name my-app \
|
||||||
|
-v $(pwd)/my_config.yaml:/app/config.yaml \
|
||||||
|
my-app:latest \
|
||||||
|
--config /app/config.yaml \
|
||||||
|
--port 4000 \
|
||||||
|
--detailed_debug \
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="py" label="litellm pip">
|
||||||
|
|
||||||
|
```shell title="litellm pip" showLineNumbers
|
||||||
|
litellm --config config.yaml --detailed_debug
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
#### 3. Make an LLM API request
|
||||||
|
|
||||||
|
In this example we will do the following:
|
||||||
|
|
||||||
|
1. Use MCP client to list MCP tools on LiteLLM Proxy
|
||||||
|
2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
|
||||||
|
3. Provide the MCP tools to `gpt-4o`
|
||||||
|
4. Handle tool call from `gpt-4o`
|
||||||
|
5. Convert OpenAI tool call to MCP tool call
|
||||||
|
6. Execute tool call on MCP server
|
||||||
|
|
||||||
|
```python title="MCP Client List Tools" showLineNumbers
|
||||||
|
import asyncio
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
from openai.types.chat import ChatCompletionUserMessageParam
|
||||||
|
from mcp import ClientSession
|
||||||
|
from mcp.client.sse import sse_client
|
||||||
|
from litellm.experimental_mcp_client.tools import (
|
||||||
|
transform_mcp_tool_to_openai_tool,
|
||||||
|
transform_openai_tool_call_request_to_mcp_tool_call_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Initialize clients
|
||||||
|
|
||||||
|
# point OpenAI client to LiteLLM Proxy
|
||||||
|
client = AsyncOpenAI(api_key="sk-1234", base_url="http://localhost:4000")
|
||||||
|
|
||||||
|
# Point MCP client to LiteLLM Proxy
|
||||||
|
async with sse_client("http://localhost:4000/mcp/") as (read, write):
|
||||||
|
async with ClientSession(read, write) as session:
|
||||||
|
await session.initialize()
|
||||||
|
|
||||||
|
# 1. List MCP tools on LiteLLM Proxy
|
||||||
|
mcp_tools = await session.list_tools()
|
||||||
|
print("List of MCP tools for MCP server:", mcp_tools.tools)
|
||||||
|
|
||||||
|
# Create message
|
||||||
|
messages = [
|
||||||
|
ChatCompletionUserMessageParam(
|
||||||
|
content="Send an email about LiteLLM supporting MCP", role="user"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# 2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
|
||||||
|
# Since OpenAI only supports tools in the OpenAI format, we need to convert the MCP tools to the OpenAI format.
|
||||||
|
openai_tools = [
|
||||||
|
transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
|
||||||
|
]
|
||||||
|
|
||||||
|
# 3. Provide the MCP tools to `gpt-4o`
|
||||||
|
response = await client.chat.completions.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=messages,
|
||||||
|
tools=openai_tools,
|
||||||
|
tool_choice="auto",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Handle tool call from `gpt-4o`
|
||||||
|
if response.choices[0].message.tool_calls:
|
||||||
|
tool_call = response.choices[0].message.tool_calls[0]
|
||||||
|
if tool_call:
|
||||||
|
|
||||||
|
# 5. Convert OpenAI tool call to MCP tool call
|
||||||
|
# Since MCP servers expect tools in the MCP format, we need to convert the OpenAI tool call to the MCP format.
|
||||||
|
# This is done using litellm.experimental_mcp_client.tools.transform_openai_tool_call_request_to_mcp_tool_call_request
|
||||||
|
mcp_call = (
|
||||||
|
transform_openai_tool_call_request_to_mcp_tool_call_request(
|
||||||
|
openai_tool=tool_call.model_dump()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. Execute tool call on MCP server
|
||||||
|
result = await session.call_tool(
|
||||||
|
name=mcp_call.name, arguments=mcp_call.arguments
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Result:", result)
|
||||||
|
|
||||||
|
|
||||||
|
# Run it
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
||||||
|
|
||||||
|
## LiteLLM Python SDK MCP Bridge
|
||||||
|
|
||||||
|
LiteLLM Python SDK acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
|
||||||
|
|
||||||
- **List** Available MCP Tools: OpenAI clients can view all available MCP tools
|
- **List** Available MCP Tools: OpenAI clients can view all available MCP tools
|
||||||
- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
|
- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
|
||||||
|
@ -26,8 +182,6 @@ LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported mod
|
||||||
- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
|
- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### 1. List Available MCP Tools
|
### 1. List Available MCP Tools
|
||||||
|
|
||||||
In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
|
In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
|
||||||
|
@ -271,215 +425,3 @@ async with stdio_client(server_params) as (read, write):
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Upcoming Features
|
|
||||||
|
|
||||||
:::info
|
|
||||||
|
|
||||||
**This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
### Expose MCP tools on LiteLLM Proxy Server
|
|
||||||
|
|
||||||
This allows you to define tools that can be called by any MCP compatible client. Define your mcp_tools with LiteLLM and all your clients can list and call available tools.
|
|
||||||
|
|
||||||
#### How it works
|
|
||||||
|
|
||||||
LiteLLM exposes the following MCP endpoints:
|
|
||||||
|
|
||||||
- `/mcp/list_tools` - List all available tools
|
|
||||||
- `/mcp/call_tool` - Call a specific tool with the provided arguments
|
|
||||||
|
|
||||||
When MCP clients connect to LiteLLM they can follow this workflow:
|
|
||||||
|
|
||||||
1. Connect to the LiteLLM MCP server
|
|
||||||
2. List all available tools on LiteLLM
|
|
||||||
3. Client makes LLM API request with tool call(s)
|
|
||||||
4. LLM API returns which tools to call and with what arguments
|
|
||||||
5. MCP client makes tool calls to LiteLLM
|
|
||||||
6. LiteLLM makes the tool calls to the appropriate handlers
|
|
||||||
7. LiteLLM returns the tool call results to the MCP client
|
|
||||||
|
|
||||||
#### Usage
|
|
||||||
|
|
||||||
#### 1. Define your tools on mcp_tools
|
|
||||||
|
|
||||||
LiteLLM allows you to define your tools on the `mcp_tools` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-4o
|
|
||||||
litellm_params:
|
|
||||||
model: openai/gpt-4o
|
|
||||||
api_key: sk-xxxxxxx
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
mcp_tools:
|
|
||||||
- name: "get_current_time"
|
|
||||||
description: "Get the current time"
|
|
||||||
input_schema: {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The format of the time to return",
|
|
||||||
"enum": ["short"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
handler: "mcp_tools.get_current_time"
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 2. Define a handler for your tool
|
|
||||||
|
|
||||||
Create a new file called `mcp_tools.py` and add this code. The key method here is `get_current_time` which gets executed when the `get_current_time` tool is called.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# mcp_tools.py
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
def get_current_time(format: str = "short"):
|
|
||||||
"""
|
|
||||||
Simple handler for the 'get_current_time' tool.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
format (str): The format of the time to return ('short').
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The current time formatted as 'HH:MM'.
|
|
||||||
"""
|
|
||||||
# Get the current time
|
|
||||||
current_time = datetime.now()
|
|
||||||
|
|
||||||
# Format the time as 'HH:MM'
|
|
||||||
return current_time.strftime('%H:%M')
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 3. Start LiteLLM Gateway
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="docker" label="Docker Run">
|
|
||||||
|
|
||||||
Mount your `mcp_tools.py` on the LiteLLM Docker container.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -d \
|
|
||||||
-p 4000:4000 \
|
|
||||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
|
||||||
--name my-app \
|
|
||||||
-v $(pwd)/my_config.yaml:/app/config.yaml \
|
|
||||||
-v $(pwd)/mcp_tools.py:/app/mcp_tools.py \
|
|
||||||
my-app:latest \
|
|
||||||
--config /app/config.yaml \
|
|
||||||
--port 4000 \
|
|
||||||
--detailed_debug \
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="py" label="litellm pip">
|
|
||||||
|
|
||||||
```shell
|
|
||||||
litellm --config config.yaml --detailed_debug
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
#### 4. Make an LLM API request
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from langchain_mcp_adapters.tools import load_mcp_tools
|
|
||||||
from langchain_openai import ChatOpenAI
|
|
||||||
from langgraph.prebuilt import create_react_agent
|
|
||||||
from mcp import ClientSession
|
|
||||||
from mcp.client.sse import sse_client
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
# Initialize the model with your API key
|
|
||||||
model = ChatOpenAI(model="gpt-4o")
|
|
||||||
|
|
||||||
# Connect to the MCP server
|
|
||||||
async with sse_client(url="http://localhost:4000/mcp/") as (read, write):
|
|
||||||
async with ClientSession(read, write) as session:
|
|
||||||
# Initialize the session
|
|
||||||
print("Initializing session...")
|
|
||||||
await session.initialize()
|
|
||||||
print("Session initialized")
|
|
||||||
|
|
||||||
# Load available tools from MCP
|
|
||||||
print("Loading tools...")
|
|
||||||
tools = await load_mcp_tools(session)
|
|
||||||
print(f"Loaded {len(tools)} tools")
|
|
||||||
|
|
||||||
# Create a ReAct agent with the model and tools
|
|
||||||
agent = create_react_agent(model, tools)
|
|
||||||
|
|
||||||
# Run the agent with a user query
|
|
||||||
user_query = "What's the weather in Tokyo?"
|
|
||||||
print(f"Asking: {user_query}")
|
|
||||||
agent_response = await agent.ainvoke({"messages": user_query})
|
|
||||||
print("Agent response:")
|
|
||||||
print(agent_response)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Specification for `mcp_tools`
|
|
||||||
|
|
||||||
The `mcp_tools` section in your LiteLLM config defines tools that can be called by MCP-compatible clients.
|
|
||||||
|
|
||||||
#### Tool Definition Format
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
mcp_tools:
|
|
||||||
- name: string # Required: Name of the tool
|
|
||||||
description: string # Required: Description of what the tool does
|
|
||||||
input_schema: object # Required: JSON Schema defining the tool's input parameters
|
|
||||||
handler: string # Required: Path to the function that implements the tool
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Field Details
|
|
||||||
|
|
||||||
- `name`: A unique identifier for the tool
|
|
||||||
- `description`: A clear description of what the tool does, used by LLMs to determine when to call it
|
|
||||||
- `input_schema`: JSON Schema object defining the expected input parameters
|
|
||||||
- `handler`: String path to the Python function that implements the tool (e.g., "module.submodule.function_name")
|
|
||||||
|
|
||||||
#### Example Tool Definition
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
mcp_tools:
|
|
||||||
- name: "get_current_time"
|
|
||||||
description: "Get the current time in a specified format"
|
|
||||||
input_schema: {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The format of the time to return",
|
|
||||||
"enum": ["short", "long", "iso"]
|
|
||||||
},
|
|
||||||
"timezone": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The timezone to use (e.g., 'UTC', 'America/New_York')",
|
|
||||||
"default": "UTC"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["format"]
|
|
||||||
}
|
|
||||||
handler: "mcp_tools.get_current_time"
|
|
||||||
```
|
|
||||||
|
|
|
@ -664,6 +664,58 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Usage - Latency Optimized Inference
|
||||||
|
|
||||||
|
Valid from v1.65.1+
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
performanceConfig={"latency": "optimized"},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock-claude-3-7
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||||
|
performanceConfig: {"latency": "optimized"} # 👈 EITHER HERE OR ON REQUEST
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "bedrock-claude-3-7",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"performanceConfig": {"latency": "optimized"} # 👈 EITHER HERE OR ON CONFIG.YAML
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## Usage - Bedrock Guardrails
|
## Usage - Bedrock Guardrails
|
||||||
|
|
||||||
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
|
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
|
||||||
|
@ -1776,6 +1828,7 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
1. Setup config.yaml
|
1. Setup config.yaml
|
||||||
|
@ -1820,11 +1873,13 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### SSO Login (AWS Profile)
|
### SSO Login (AWS Profile)
|
||||||
- Set `AWS_PROFILE` environment variable
|
- Set `AWS_PROFILE` environment variable
|
||||||
- Make bedrock completion call
|
- Make bedrock completion call
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import os
|
import os
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
@ -1917,12 +1972,46 @@ model_list:
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
Text to Image :
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "amazon.nova-canvas-v1:0",
|
||||||
|
"prompt": "A cute baby sea otter"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Color Guided Generation:
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "amazon.nova-canvas-v1:0",
|
||||||
|
"prompt": "A cute baby sea otter",
|
||||||
|
"taskType": "COLOR_GUIDED_GENERATION",
|
||||||
|
"colorGuidedGenerationParams":{"colors":["#FFFFFF"]}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|-------------------------|---------------------------------------------|
|
||||||
|
| Stable Diffusion 3 - v0 | `image_generation(model="bedrock/stability.stability.sd3-large-v1:0", prompt=prompt)` |
|
||||||
|
| Stable Diffusion - v0 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v0", prompt=prompt)` |
|
||||||
|
| Stable Diffusion - v1 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v1", prompt=prompt)` |
|
||||||
|
| Amazon Nova Canvas - v0 | `image_generation(model="bedrock/amazon.nova-canvas-v1:0", prompt=prompt)` |
|
||||||
|
|
||||||
|
|
||||||
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
||||||
|
|
||||||
|
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
||||||
|
|
||||||
:::warning
|
:::warning
|
||||||
|
|
||||||
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Experimental - 2024-Jun-23:
|
Experimental - 2024-Jun-23:
|
||||||
|
|
|
@ -589,8 +589,10 @@ response = litellm.completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "Please summarize the audio."},
|
{"type": "text", "text": "Please summarize the audio."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
|
"file": {
|
||||||
|
"file_data": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -640,8 +642,11 @@ response = litellm.completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "Please summarize the file."},
|
{"type": "text", "text": "Please summarize the file."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "https://storage..." # 👈 SET THE IMG URL
|
"file": {
|
||||||
|
"file_id": "https://storage...", # 👈 SET THE IMG URL
|
||||||
|
"format": "application/pdf" # OPTIONAL
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -668,8 +673,11 @@ response = litellm.completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "Please summarize the file."},
|
{"type": "text", "text": "Please summarize the file."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "gs://..." # 👈 SET THE cloud storage bucket url
|
"file": {
|
||||||
|
"file_id": "gs://storage...", # 👈 SET THE IMG URL
|
||||||
|
"format": "application/pdf" # OPTIONAL
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
|
@ -325,6 +325,74 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
|
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
|
||||||
|
|
||||||
|
|
||||||
|
## OpenAI Audio Transcription
|
||||||
|
|
||||||
|
LiteLLM supports OpenAI Audio Transcription endpoint.
|
||||||
|
|
||||||
|
Supported models:
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|---------------------------|-----------------------------------------------------------------|
|
||||||
|
| `whisper-1` | `response = completion(model="whisper-1", file=audio_file)` |
|
||||||
|
| `gpt-4o-transcribe` | `response = completion(model="gpt-4o-transcribe", file=audio_file)` |
|
||||||
|
| `gpt-4o-mini-transcribe` | `response = completion(model="gpt-4o-mini-transcribe", file=audio_file)` |
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import transcription
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set api keys
|
||||||
|
os.environ["OPENAI_API_KEY"] = ""
|
||||||
|
audio_file = open("/path/to/audio.mp3", "rb")
|
||||||
|
|
||||||
|
response = transcription(model="gpt-4o-transcribe", file=audio_file)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o-transcribe
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o-transcribe
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
mode: audio_transcription
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:8000/v1/audio/transcriptions' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--form 'file=@"/Users/krrishdholakia/Downloads/gettysburg.wav"' \
|
||||||
|
--form 'model="gpt-4o-transcribe"'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Advanced
|
## Advanced
|
||||||
|
|
||||||
### Getting OpenAI API Response Headers
|
### Getting OpenAI API Response Headers
|
||||||
|
|
|
@ -1369,6 +1369,103 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Gemini Pro
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|------------------|--------------------------------------|
|
||||||
|
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
||||||
|
|
||||||
|
## Fine-tuned Models
|
||||||
|
|
||||||
|
You can call fine-tuned Vertex AI Gemini models through LiteLLM
|
||||||
|
|
||||||
|
| Property | Details |
|
||||||
|
|----------|---------|
|
||||||
|
| Provider Route | `vertex_ai/gemini/{MODEL_ID}` |
|
||||||
|
| Vertex Documentation | [Vertex AI - Fine-tuned Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning#test_the_tuned_model_with_a_prompt)|
|
||||||
|
| Supported Operations | `/chat/completions`, `/completions`, `/embeddings`, `/images` |
|
||||||
|
|
||||||
|
To use a model that follows the `/gemini` request/response format, simply set the model parameter as
|
||||||
|
|
||||||
|
```python title="Model parameter for calling fine-tuned gemini models"
|
||||||
|
model="vertex_ai/gemini/<your-finetuned-model>"
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="LiteLLM Python SDK">
|
||||||
|
|
||||||
|
```python showLineNumbers title="Example"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
|
||||||
|
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="vertex_ai/gemini/<your-finetuned-model>", # e.g. vertex_ai/gemini/4965075652664360960
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM Proxy">
|
||||||
|
|
||||||
|
1. Add Vertex Credentials to your env
|
||||||
|
|
||||||
|
```bash title="Authenticate to Vertex AI"
|
||||||
|
!gcloud auth application-default login
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="Add to litellm config"
|
||||||
|
- model_name: finetuned-gemini
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini/<ENDPOINT_ID>
|
||||||
|
vertex_project: <PROJECT_ID>
|
||||||
|
vertex_location: <LOCATION>
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python showLineNumbers title="Example request"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-litellm-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="finetuned-gemini",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "hi"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```bash showLineNumbers title="Example request"
|
||||||
|
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: <LITELLM_KEY>' \
|
||||||
|
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Model Garden
|
## Model Garden
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
@ -1479,67 +1576,6 @@ response = completion(
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Gemini Pro
|
|
||||||
| Model Name | Function Call |
|
|
||||||
|------------------|--------------------------------------|
|
|
||||||
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
|
||||||
|
|
||||||
## Fine-tuned Models
|
|
||||||
|
|
||||||
Fine tuned models on vertex have a numerical model/endpoint id.
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
import os
|
|
||||||
|
|
||||||
## set ENV variables
|
|
||||||
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
|
|
||||||
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="vertex_ai/<your-finetuned-model>", # e.g. vertex_ai/4965075652664360960
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Add Vertex Credentials to your env
|
|
||||||
|
|
||||||
```bash
|
|
||||||
!gcloud auth application-default login
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
- model_name: finetuned-gemini
|
|
||||||
litellm_params:
|
|
||||||
model: vertex_ai/<ENDPOINT_ID>
|
|
||||||
vertex_project: <PROJECT_ID>
|
|
||||||
vertex_location: <LOCATION>
|
|
||||||
model_info:
|
|
||||||
base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--header 'Authorization: <LITELLM_KEY>' \
|
|
||||||
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Gemini Pro Vision
|
## Gemini Pro Vision
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|
@ -1684,23 +1720,25 @@ assert isinstance(
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Usage - PDF / Videos / etc. Files
|
## Usage - PDF / Videos / Audio etc. Files
|
||||||
|
|
||||||
Pass any file supported by Vertex AI, through LiteLLM.
|
Pass any file supported by Vertex AI, through LiteLLM.
|
||||||
|
|
||||||
LiteLLM Supports the following image types passed in url
|
LiteLLM Supports the following file types passed in url.
|
||||||
|
|
||||||
|
Using `file` message type for VertexAI is live from v1.65.1+
|
||||||
|
|
||||||
```
|
```
|
||||||
Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
|
Files with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
|
||||||
Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
Files with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
||||||
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
|
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
|
||||||
Base64 Encoded Local Images
|
Base64 Encoded Local Files
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
### **Using `gs://`**
|
### **Using `gs://` or any URL**
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
@ -1712,8 +1750,11 @@ response = completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", # 👈 PDF
|
"file": {
|
||||||
|
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
|
||||||
|
"format": "application/pdf" # OPTIONAL - specify mime-type
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -1747,8 +1788,16 @@ response = completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
"file": {
|
||||||
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "audio_input",
|
||||||
|
"audio_input {
|
||||||
|
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -1794,8 +1843,11 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf" # 👈 PDF
|
"file": {
|
||||||
|
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
|
||||||
|
"format": "application/pdf" # OPTIONAL
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -1822,10 +1874,17 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
|
"file": {
|
||||||
}
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "audio_input",
|
||||||
|
"audio_input {
|
||||||
|
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
|
||||||
}
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -1836,6 +1895,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Chat Models
|
## Chat Models
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|------------------|--------------------------------------|
|
|------------------|--------------------------------------|
|
||||||
|
@ -2044,7 +2104,12 @@ print(response)
|
||||||
|
|
||||||
## **Multi-Modal Embeddings**
|
## **Multi-Modal Embeddings**
|
||||||
|
|
||||||
Usage
|
|
||||||
|
Known Limitations:
|
||||||
|
- Only supports 1 image / video / image per request
|
||||||
|
- Only supports GCS or base64 encoded images / videos
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
@ -2260,6 +2325,115 @@ print(f"Text Embedding: {embeddings.text_embedding}")
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
### Text + Image + Video Embeddings
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
Text + Image
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = await litellm.aembedding(
|
||||||
|
model="vertex_ai/multimodalembedding@001",
|
||||||
|
input=["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"] # will be sent as a gcs image
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Text + Video
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = await litellm.aembedding(
|
||||||
|
model="vertex_ai/multimodalembedding@001",
|
||||||
|
input=["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Image + Video
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = await litellm.aembedding(
|
||||||
|
model="vertex_ai/multimodalembedding@001",
|
||||||
|
input=["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM PROXY (Unified Endpoint)">
|
||||||
|
|
||||||
|
1. Add model to config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: multimodalembedding@001
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/multimodalembedding@001
|
||||||
|
vertex_project: "adroit-crow-413218"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
vertex_credentials: adroit-crow-413218-a956eef1a2a8.json
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
drop_params: True
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make Request use OpenAI Python SDK, Langchain Python SDK
|
||||||
|
|
||||||
|
|
||||||
|
Text + Image
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
# # request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.embeddings.create(
|
||||||
|
model="multimodalembedding@001",
|
||||||
|
input = ["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
Text + Video
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
# # request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.embeddings.create(
|
||||||
|
model="multimodalembedding@001",
|
||||||
|
input = ["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
Image + Video
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
# # request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.embeddings.create(
|
||||||
|
model="multimodalembedding@001",
|
||||||
|
input = ["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## **Image Generation Models**
|
## **Image Generation Models**
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
|
|
|
@ -147,6 +147,11 @@ Some SSO providers require a specific redirect url for login and logout. You can
|
||||||
- Login: `<your-proxy-base-url>/sso/key/generate`
|
- Login: `<your-proxy-base-url>/sso/key/generate`
|
||||||
- Logout: `<your-proxy-base-url>`
|
- Logout: `<your-proxy-base-url>`
|
||||||
|
|
||||||
|
Here's the env var to set the logout url on the proxy
|
||||||
|
```bash
|
||||||
|
PROXY_LOGOUT_URL="https://www.google.com"
|
||||||
|
```
|
||||||
|
|
||||||
#### Step 3. Set `PROXY_BASE_URL` in your .env
|
#### Step 3. Set `PROXY_BASE_URL` in your .env
|
||||||
|
|
||||||
Set this in your .env (so the proxy can set the correct redirect url)
|
Set this in your .env (so the proxy can set the correct redirect url)
|
||||||
|
|
|
@ -160,7 +160,7 @@ general_settings:
|
||||||
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
||||||
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
||||||
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
||||||
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
|
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key [Doc on graceful db unavailability](prod#5-if-running-litellm-on-vpc-gracefully-handle-db-unavailability) |
|
||||||
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
||||||
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
||||||
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
||||||
|
@ -479,7 +479,7 @@ router_settings:
|
||||||
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
||||||
| PROXY_BASE_URL | Base URL for proxy service
|
| PROXY_BASE_URL | Base URL for proxy service
|
||||||
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
||||||
| PROXY_MASTER_KEY | Master key for proxy authentication
|
| LITELLM_MASTER_KEY | Master key for proxy authentication
|
||||||
| QDRANT_API_BASE | Base URL for Qdrant API
|
| QDRANT_API_BASE | Base URL for Qdrant API
|
||||||
| QDRANT_API_KEY | API key for Qdrant service
|
| QDRANT_API_KEY | API key for Qdrant service
|
||||||
| QDRANT_URL | Connection URL for Qdrant database
|
| QDRANT_URL | Connection URL for Qdrant database
|
||||||
|
@ -515,4 +515,5 @@ router_settings:
|
||||||
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
|
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
|
||||||
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
|
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
|
||||||
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
|
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
|
||||||
|
| USE_PRISMA_MIGRATE | Flag to use prisma migrate instead of prisma db push. Recommended for production environments.
|
||||||
| WEBHOOK_URL | URL for receiving webhooks from external services
|
| WEBHOOK_URL | URL for receiving webhooks from external services
|
||||||
|
|
|
@ -94,15 +94,31 @@ This disables the load_dotenv() functionality, which will automatically load you
|
||||||
|
|
||||||
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
|
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
|
||||||
|
|
||||||
This will allow LiteLLM to continue to process requests even if the DB is unavailable. This is better handling for DB unavailability.
|
When running LiteLLM on a VPC (and inaccessible from the public internet), you can enable graceful degradation so that request processing continues even if the database is temporarily unavailable.
|
||||||
|
|
||||||
|
|
||||||
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
|
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
|
||||||
|
|
||||||
```yaml
|
#### Configuration
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="litellm config.yaml"
|
||||||
general_settings:
|
general_settings:
|
||||||
allow_requests_on_db_unavailable: True
|
allow_requests_on_db_unavailable: True
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Expected Behavior
|
||||||
|
|
||||||
|
When `allow_requests_on_db_unavailable` is set to `true`, LiteLLM will handle errors as follows:
|
||||||
|
|
||||||
|
| Type of Error | Expected Behavior | Details |
|
||||||
|
|---------------|-------------------|----------------|
|
||||||
|
| Prisma Errors | ✅ Request will be allowed | Covers issues like DB connection resets or rejections from the DB via Prisma, the ORM used by LiteLLM. |
|
||||||
|
| Httpx Errors | ✅ Request will be allowed | Occurs when the database is unreachable, allowing the request to proceed despite the DB outage. |
|
||||||
|
| Pod Startup Behavior | ✅ Pods start regardless | LiteLLM Pods will start even if the database is down or unreachable, ensuring higher uptime guarantees for deployments. |
|
||||||
|
| Health/Readiness Check | ✅ Always returns 200 OK | The /health/readiness endpoint returns a 200 OK status to ensure that pods remain operational even when the database is unavailable.
|
||||||
|
| LiteLLM Budget Errors or Model Errors | ❌ Request will be blocked | Triggered when the DB is reachable but the authentication token is invalid, lacks access, or exceeds budget limits. |
|
||||||
|
|
||||||
|
|
||||||
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
|
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
|
||||||
|
|
||||||
By default, LiteLLM writes several types of logs to the database:
|
By default, LiteLLM writes several types of logs to the database:
|
||||||
|
@ -183,93 +199,3 @@ You should only see the following level of details in logs on the proxy server
|
||||||
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
|
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
|
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Machine Specifications to Deploy LiteLLM
|
|
||||||
|
|
||||||
| Service | Spec | CPUs | Memory | Architecture | Version|
|
|
||||||
| --- | --- | --- | --- | --- | --- |
|
|
||||||
| Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
|
|
||||||
| Redis Cache | - | - | - | - | 7.0+ Redis Engine|
|
|
||||||
|
|
||||||
|
|
||||||
### Reference Kubernetes Deployment YAML
|
|
||||||
|
|
||||||
Reference Kubernetes `deployment.yaml` that was load tested by us
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: litellm-deployment
|
|
||||||
spec:
|
|
||||||
replicas: 3
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: litellm
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: litellm
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: litellm-container
|
|
||||||
image: ghcr.io/berriai/litellm:main-latest
|
|
||||||
imagePullPolicy: Always
|
|
||||||
env:
|
|
||||||
- name: AZURE_API_KEY
|
|
||||||
value: "d6******"
|
|
||||||
- name: AZURE_API_BASE
|
|
||||||
value: "https://ope******"
|
|
||||||
- name: LITELLM_MASTER_KEY
|
|
||||||
value: "sk-1234"
|
|
||||||
- name: DATABASE_URL
|
|
||||||
value: "po**********"
|
|
||||||
args:
|
|
||||||
- "--config"
|
|
||||||
- "/app/proxy_config.yaml" # Update the path to mount the config file
|
|
||||||
volumeMounts: # Define volume mount for proxy_config.yaml
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /app
|
|
||||||
readOnly: true
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health/liveliness
|
|
||||||
port: 4000
|
|
||||||
initialDelaySeconds: 120
|
|
||||||
periodSeconds: 15
|
|
||||||
successThreshold: 1
|
|
||||||
failureThreshold: 3
|
|
||||||
timeoutSeconds: 10
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health/readiness
|
|
||||||
port: 4000
|
|
||||||
initialDelaySeconds: 120
|
|
||||||
periodSeconds: 15
|
|
||||||
successThreshold: 1
|
|
||||||
failureThreshold: 3
|
|
||||||
timeoutSeconds: 10
|
|
||||||
volumes: # Define volume to mount proxy_config.yaml
|
|
||||||
- name: config-volume
|
|
||||||
configMap:
|
|
||||||
name: litellm-config
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
Reference Kubernetes `service.yaml` that was load tested by us
|
|
||||||
```yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: litellm-service
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: litellm
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 4000
|
|
||||||
targetPort: 4000
|
|
||||||
type: LoadBalancer
|
|
||||||
```
|
|
||||||
|
|
|
@ -188,7 +188,13 @@ Currently implemented for:
|
||||||
- OpenAI (if OPENAI_API_KEY is set)
|
- OpenAI (if OPENAI_API_KEY is set)
|
||||||
- Fireworks AI (if FIREWORKS_AI_API_KEY is set)
|
- Fireworks AI (if FIREWORKS_AI_API_KEY is set)
|
||||||
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
|
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
|
||||||
|
- Gemini (if GEMINI_API_KEY is set)
|
||||||
|
- XAI (if XAI_API_KEY is set)
|
||||||
|
- Anthropic (if ANTHROPIC_API_KEY is set)
|
||||||
|
|
||||||
|
You can also specify a custom provider to check:
|
||||||
|
|
||||||
|
**All providers**:
|
||||||
```python
|
```python
|
||||||
from litellm import get_valid_models
|
from litellm import get_valid_models
|
||||||
|
|
||||||
|
@ -196,6 +202,14 @@ valid_models = get_valid_models(check_provider_endpoint=True)
|
||||||
print(valid_models)
|
print(valid_models)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Specific provider**:
|
||||||
|
```python
|
||||||
|
from litellm import get_valid_models
|
||||||
|
|
||||||
|
valid_models = get_valid_models(check_provider_endpoint=True, custom_llm_provider="openai")
|
||||||
|
print(valid_models)
|
||||||
|
```
|
||||||
|
|
||||||
### `validate_environment(model: str)`
|
### `validate_environment(model: str)`
|
||||||
|
|
||||||
This helper tells you if you have all the required environment variables for a model, and if not - what's missing.
|
This helper tells you if you have all the required environment variables for a model, and if not - what's missing.
|
||||||
|
|
|
@ -98,6 +98,5 @@ On the models dropdown select `thinking-anthropic-claude-3-7-sonnet`
|
||||||
|
|
||||||
<Image img={require('../../img/litellm_thinking_openweb.gif')} />
|
<Image img={require('../../img/litellm_thinking_openweb.gif')} />
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
- Running LiteLLM and OpenWebUI on Windows Localhost: A Comprehensive Guide [https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/](https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/)
|
||||||
|
|
BIN
docs/my-website/img/mcp_2.png
Normal file
BIN
docs/my-website/img/mcp_2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 133 KiB |
BIN
docs/my-website/img/mcp_ui.png
Normal file
BIN
docs/my-website/img/mcp_ui.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 93 KiB |
BIN
docs/my-website/img/release_notes/mcp_ui.png
Normal file
BIN
docs/my-website/img/release_notes/mcp_ui.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 237 KiB |
BIN
docs/my-website/img/release_notes/team_model_add.png
Normal file
BIN
docs/my-website/img/release_notes/team_model_add.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 70 KiB |
BIN
docs/my-website/img/release_notes/ui_usage.png
Normal file
BIN
docs/my-website/img/release_notes/ui_usage.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 66 KiB |
|
@ -24,6 +24,7 @@ This release brings:
|
||||||
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
|
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
|
||||||
- Perf improvements for Usage-based Routing
|
- Perf improvements for Usage-based Routing
|
||||||
- Streaming guardrail support via websockets
|
- Streaming guardrail support via websockets
|
||||||
|
- Azure OpenAI client perf fix (from previous release)
|
||||||
|
|
||||||
## Docker Run LiteLLM Proxy
|
## Docker Run LiteLLM Proxy
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ This release brings:
|
||||||
docker run
|
docker run
|
||||||
-e STORE_MODEL_IN_DB=True
|
-e STORE_MODEL_IN_DB=True
|
||||||
-p 4000:4000
|
-p 4000:4000
|
||||||
ghcr.io/berriai/litellm:main-v1.63.14-stable
|
ghcr.io/berriai/litellm:main-v1.63.14-stable.patch1
|
||||||
```
|
```
|
||||||
|
|
||||||
## Demo Instance
|
## Demo Instance
|
||||||
|
|
160
docs/my-website/release_notes/v1.65.0-stable/index.md
Normal file
160
docs/my-website/release_notes/v1.65.0-stable/index.md
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
---
|
||||||
|
title: v1.65.0-stable - Model Context Protocol
|
||||||
|
slug: v1.65.0-stable
|
||||||
|
date: 2025-03-30T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
tags: [mcp, custom_prompt_management]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
v1.65.0-stable is live now. Here are the key highlights of this release:
|
||||||
|
- **MCP Support**: Support for adding and using MCP servers on the LiteLLM proxy.
|
||||||
|
- **UI view total usage after 1M+ logs**: You can now view usage analytics after crossing 1M+ logs in DB.
|
||||||
|
|
||||||
|
## Model Context Protocol (MCP)
|
||||||
|
|
||||||
|
This release introduces support for centrally adding MCP servers on LiteLLM. This allows you to add MCP server endpoints and your developers can `list` and `call` MCP tools through LiteLLM.
|
||||||
|
|
||||||
|
Read more about MCP [here](https://docs.litellm.ai/docs/mcp).
|
||||||
|
|
||||||
|
<Image
|
||||||
|
img={require('../../img/release_notes/mcp_ui.png')}
|
||||||
|
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
||||||
|
/>
|
||||||
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
|
Expose and use MCP servers through LiteLLM
|
||||||
|
</p>
|
||||||
|
|
||||||
|
## UI view total usage after 1M+ logs
|
||||||
|
|
||||||
|
This release brings the ability to view total usage analytics even after exceeding 1M+ logs in your database. We've implemented a scalable architecture that stores only aggregate usage data, resulting in significantly more efficient queries and reduced database CPU utilization.
|
||||||
|
|
||||||
|
|
||||||
|
<Image
|
||||||
|
img={require('../../img/release_notes/ui_usage.png')}
|
||||||
|
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
||||||
|
/>
|
||||||
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
|
View total usage after 1M+ logs
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
- How this works:
|
||||||
|
- We now aggregate usage data into a dedicated DailyUserSpend table, significantly reducing query load and CPU usage even beyond 1M+ logs.
|
||||||
|
|
||||||
|
- Daily Spend Breakdown API:
|
||||||
|
|
||||||
|
- Retrieve granular daily usage data (by model, provider, and API key) with a single endpoint.
|
||||||
|
Example Request:
|
||||||
|
|
||||||
|
```shell title="Daily Spend Breakdown API" showLineNumbers
|
||||||
|
curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \
|
||||||
|
-H 'Authorization: Bearer sk-...'
|
||||||
|
```
|
||||||
|
|
||||||
|
```json title="Daily Spend Breakdown API Response" showLineNumbers
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"date": "2025-03-27",
|
||||||
|
"metrics": {
|
||||||
|
"spend": 0.0177072,
|
||||||
|
"prompt_tokens": 111,
|
||||||
|
"completion_tokens": 1711,
|
||||||
|
"total_tokens": 1822,
|
||||||
|
"api_requests": 11
|
||||||
|
},
|
||||||
|
"breakdown": {
|
||||||
|
"models": {
|
||||||
|
"gpt-4o-mini": {
|
||||||
|
"spend": 1.095e-05,
|
||||||
|
"prompt_tokens": 37,
|
||||||
|
"completion_tokens": 9,
|
||||||
|
"total_tokens": 46,
|
||||||
|
"api_requests": 1
|
||||||
|
},
|
||||||
|
"providers": { "openai": { ... }, "azure_ai": { ... } },
|
||||||
|
"api_keys": { "3126b6eaf1...": { ... } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"total_spend": 0.7274667,
|
||||||
|
"total_prompt_tokens": 280990,
|
||||||
|
"total_completion_tokens": 376674,
|
||||||
|
"total_api_requests": 14
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## New Models / Updated Models
|
||||||
|
- Support for Vertex AI gemini-2.0-flash-lite & Google AI Studio gemini-2.0-flash-lite [PR](https://github.com/BerriAI/litellm/pull/9523)
|
||||||
|
- Support for Vertex AI Fine-Tuned LLMs [PR](https://github.com/BerriAI/litellm/pull/9542)
|
||||||
|
- Nova Canvas image generation support [PR](https://github.com/BerriAI/litellm/pull/9525)
|
||||||
|
- OpenAI gpt-4o-transcribe support [PR](https://github.com/BerriAI/litellm/pull/9517)
|
||||||
|
- Added new Vertex AI text embedding model [PR](https://github.com/BerriAI/litellm/pull/9476)
|
||||||
|
|
||||||
|
## LLM Translation
|
||||||
|
- OpenAI Web Search Tool Call Support [PR](https://github.com/BerriAI/litellm/pull/9465)
|
||||||
|
- Vertex AI topLogprobs support [PR](https://github.com/BerriAI/litellm/pull/9518)
|
||||||
|
- Support for sending images and video to Vertex AI multimodal embedding [Doc](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings)
|
||||||
|
- Support litellm.api_base for Vertex AI + Gemini across completion, embedding, image_generation [PR](https://github.com/BerriAI/litellm/pull/9516)
|
||||||
|
- Bug fix for returning `response_cost` when using litellm python SDK with LiteLLM Proxy [PR](https://github.com/BerriAI/litellm/commit/6fd18651d129d606182ff4b980e95768fc43ca3d)
|
||||||
|
- Support for `max_completion_tokens` on Mistral API [PR](https://github.com/BerriAI/litellm/pull/9606)
|
||||||
|
- Refactored Vertex AI passthrough routes - fixes unpredictable behaviour with auto-setting default_vertex_region on router model add [PR](https://github.com/BerriAI/litellm/pull/9467)
|
||||||
|
|
||||||
|
## Spend Tracking Improvements
|
||||||
|
- Log 'api_base' on spend logs [PR](https://github.com/BerriAI/litellm/pull/9509)
|
||||||
|
- Support for Gemini audio token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
|
||||||
|
- Fixed OpenAI audio input token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
|
||||||
|
|
||||||
|
## UI
|
||||||
|
|
||||||
|
### Model Management
|
||||||
|
- Allowed team admins to add/update/delete models on UI [PR](https://github.com/BerriAI/litellm/pull/9572)
|
||||||
|
- Added render supports_web_search on model hub [PR](https://github.com/BerriAI/litellm/pull/9469)
|
||||||
|
|
||||||
|
### Request Logs
|
||||||
|
- Show API base and model ID on request logs [PR](https://github.com/BerriAI/litellm/pull/9572)
|
||||||
|
- Allow viewing keyinfo on request logs [PR](https://github.com/BerriAI/litellm/pull/9568)
|
||||||
|
|
||||||
|
### Usage Tab
|
||||||
|
- Added Daily User Spend Aggregate view - allows UI Usage tab to work > 1m rows [PR](https://github.com/BerriAI/litellm/pull/9538)
|
||||||
|
- Connected UI to "LiteLLM_DailyUserSpend" spend table [PR](https://github.com/BerriAI/litellm/pull/9603)
|
||||||
|
|
||||||
|
## Logging Integrations
|
||||||
|
- Fixed StandardLoggingPayload for GCS Pub Sub Logging Integration [PR](https://github.com/BerriAI/litellm/pull/9508)
|
||||||
|
- Track `litellm_model_name` on `StandardLoggingPayload` [Docs](https://docs.litellm.ai/docs/proxy/logging_spec#standardlogginghiddenparams)
|
||||||
|
|
||||||
|
## Performance / Reliability Improvements
|
||||||
|
- LiteLLM Redis semantic caching implementation [PR](https://github.com/BerriAI/litellm/pull/9356)
|
||||||
|
- Gracefully handle exceptions when DB is having an outage [PR](https://github.com/BerriAI/litellm/pull/9533)
|
||||||
|
- Allow Pods to startup + passing /health/readiness when allow_requests_on_db_unavailable: True and DB is down [PR](https://github.com/BerriAI/litellm/pull/9569)
|
||||||
|
|
||||||
|
|
||||||
|
## General Improvements
|
||||||
|
- Support for exposing MCP tools on litellm proxy [PR](https://github.com/BerriAI/litellm/pull/9426)
|
||||||
|
- Support discovering Gemini, Anthropic, xAI models by calling their /v1/model endpoint [PR](https://github.com/BerriAI/litellm/pull/9530)
|
||||||
|
- Fixed route check for non-proxy admins on JWT auth [PR](https://github.com/BerriAI/litellm/pull/9454)
|
||||||
|
- Added baseline Prisma database migrations [PR](https://github.com/BerriAI/litellm/pull/9565)
|
||||||
|
- View all wildcard models on /model/info [PR](https://github.com/BerriAI/litellm/pull/9572)
|
||||||
|
|
||||||
|
|
||||||
|
## Security
|
||||||
|
- Bumped next from 14.2.21 to 14.2.25 in UI dashboard [PR](https://github.com/BerriAI/litellm/pull/9458)
|
||||||
|
|
||||||
|
## Complete Git Diff
|
||||||
|
|
||||||
|
[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.63.14-stable.patch1...v1.65.0-stable)
|
34
docs/my-website/release_notes/v1.65.0/index.md
Normal file
34
docs/my-website/release_notes/v1.65.0/index.md
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
---
|
||||||
|
title: v1.65.0 - Team Model Add - update
|
||||||
|
slug: v1.65.0
|
||||||
|
date: 2025-03-28T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
tags: [management endpoints, team models, ui]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
v1.65.0 updates the `/model/new` endpoint to prevent non-team admins from creating team models.
|
||||||
|
|
||||||
|
This means that only proxy admins or team admins can create team models.
|
||||||
|
|
||||||
|
## Additional Changes
|
||||||
|
|
||||||
|
- Allows team admins to call `/model/update` to update team models.
|
||||||
|
- Allows team admins to call `/model/delete` to delete team models.
|
||||||
|
- Introduces new `user_models_only` param to `/v2/model/info` - only return models added by this user.
|
||||||
|
|
||||||
|
|
||||||
|
These changes enable team admins to add and manage models for their team on the LiteLLM UI + API.
|
||||||
|
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/team_model_add.png')} />
|
|
@ -304,7 +304,6 @@ const sidebars = {
|
||||||
"image_variations",
|
"image_variations",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"mcp",
|
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "/audio",
|
label: "/audio",
|
||||||
|
|
|
@ -444,9 +444,7 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
||||||
|
|
||||||
detected_secrets = []
|
detected_secrets = []
|
||||||
for file in secrets.files:
|
for file in secrets.files:
|
||||||
|
|
||||||
for found_secret in secrets[file]:
|
for found_secret in secrets[file]:
|
||||||
|
|
||||||
if found_secret.secret_value is None:
|
if found_secret.secret_value is None:
|
||||||
continue
|
continue
|
||||||
detected_secrets.append(
|
detected_secrets.append(
|
||||||
|
@ -471,14 +469,12 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
||||||
data: dict,
|
data: dict,
|
||||||
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
|
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
|
||||||
):
|
):
|
||||||
|
|
||||||
if await self.should_run_check(user_api_key_dict) is False:
|
if await self.should_run_check(user_api_key_dict) is False:
|
||||||
return
|
return
|
||||||
|
|
||||||
if "messages" in data and isinstance(data["messages"], list):
|
if "messages" in data and isinstance(data["messages"], list):
|
||||||
for message in data["messages"]:
|
for message in data["messages"]:
|
||||||
if "content" in message and isinstance(message["content"], str):
|
if "content" in message and isinstance(message["content"], str):
|
||||||
|
|
||||||
detected_secrets = self.scan_message_for_secrets(message["content"])
|
detected_secrets = self.scan_message_for_secrets(message["content"])
|
||||||
|
|
||||||
for secret in detected_secrets:
|
for secret in detected_secrets:
|
||||||
|
|
26
litellm-proxy-extras/LICENSE
Normal file
26
litellm-proxy-extras/LICENSE
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
Portions of this software are licensed as follows:
|
||||||
|
|
||||||
|
* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
|
||||||
|
* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
|
||||||
|
---
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 Berri AI
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
21
litellm-proxy-extras/README.md
Normal file
21
litellm-proxy-extras/README.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
Additional files for the proxy. Reduces the size of the main litellm package.
|
||||||
|
|
||||||
|
Currently, only stores the migration.sql files for litellm-proxy.
|
||||||
|
|
||||||
|
To install, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install litellm-proxy-extras
|
||||||
|
```
|
||||||
|
OR
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install litellm[proxy] # installs litellm-proxy-extras and other proxy dependencies.
|
||||||
|
```
|
||||||
|
|
||||||
|
To use the migrations, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --use_prisma_migrate
|
||||||
|
```
|
||||||
|
|
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0-py3-none-any.whl
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0.tar.gz
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0.tar.gz
vendored
Normal file
Binary file not shown.
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1-py3-none-any.whl
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1.tar.gz
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1.tar.gz
vendored
Normal file
Binary file not shown.
0
litellm-proxy-extras/litellm_proxy_extras/__init__.py
Normal file
0
litellm-proxy-extras/litellm_proxy_extras/__init__.py
Normal file
12
litellm-proxy-extras/litellm_proxy_extras/_logging.py
Normal file
12
litellm-proxy-extras/litellm_proxy_extras/_logging.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Set up package logger
|
||||||
|
logger = logging.getLogger("litellm_proxy_extras")
|
||||||
|
if not logger.handlers: # Only add handler if none exists
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
|
)
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
logger.setLevel(logging.INFO)
|
|
@ -0,0 +1,360 @@
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_BudgetTable" (
|
||||||
|
"budget_id" TEXT NOT NULL,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"soft_budget" DOUBLE PRECISION,
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"model_max_budget" JSONB,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_BudgetTable_pkey" PRIMARY KEY ("budget_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_CredentialsTable" (
|
||||||
|
"credential_id" TEXT NOT NULL,
|
||||||
|
"credential_name" TEXT NOT NULL,
|
||||||
|
"credential_values" JSONB NOT NULL,
|
||||||
|
"credential_info" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_CredentialsTable_pkey" PRIMARY KEY ("credential_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ProxyModelTable" (
|
||||||
|
"model_id" TEXT NOT NULL,
|
||||||
|
"model_name" TEXT NOT NULL,
|
||||||
|
"litellm_params" JSONB NOT NULL,
|
||||||
|
"model_info" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ProxyModelTable_pkey" PRIMARY KEY ("model_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_OrganizationTable" (
|
||||||
|
"organization_id" TEXT NOT NULL,
|
||||||
|
"organization_alias" TEXT NOT NULL,
|
||||||
|
"budget_id" TEXT NOT NULL,
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"models" TEXT[],
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_OrganizationTable_pkey" PRIMARY KEY ("organization_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ModelTable" (
|
||||||
|
"id" SERIAL NOT NULL,
|
||||||
|
"aliases" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ModelTable_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_TeamTable" (
|
||||||
|
"team_id" TEXT NOT NULL,
|
||||||
|
"team_alias" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"admins" TEXT[],
|
||||||
|
"members" TEXT[],
|
||||||
|
"members_with_roles" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"models" TEXT[],
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"blocked" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_id" INTEGER,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_TeamTable_pkey" PRIMARY KEY ("team_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_UserTable" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"user_alias" TEXT,
|
||||||
|
"team_id" TEXT,
|
||||||
|
"sso_user_id" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"password" TEXT,
|
||||||
|
"teams" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"user_role" TEXT,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"user_email" TEXT,
|
||||||
|
"models" TEXT[],
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_UserTable_pkey" PRIMARY KEY ("user_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_VerificationToken" (
|
||||||
|
"token" TEXT NOT NULL,
|
||||||
|
"key_name" TEXT,
|
||||||
|
"key_alias" TEXT,
|
||||||
|
"soft_budget_cooldown" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"expires" TIMESTAMP(3),
|
||||||
|
"models" TEXT[],
|
||||||
|
"aliases" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"config" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"user_id" TEXT,
|
||||||
|
"team_id" TEXT,
|
||||||
|
"permissions" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"blocked" BOOLEAN,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_VerificationToken_pkey" PRIMARY KEY ("token")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_EndUserTable" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"alias" TEXT,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"allowed_model_region" TEXT,
|
||||||
|
"default_model" TEXT,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"blocked" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_EndUserTable_pkey" PRIMARY KEY ("user_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_Config" (
|
||||||
|
"param_name" TEXT NOT NULL,
|
||||||
|
"param_value" JSONB,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_Config_pkey" PRIMARY KEY ("param_name")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_SpendLogs" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"call_type" TEXT NOT NULL,
|
||||||
|
"api_key" TEXT NOT NULL DEFAULT '',
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"total_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"startTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"endTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"completionStartTime" TIMESTAMP(3),
|
||||||
|
"model" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_id" TEXT DEFAULT '',
|
||||||
|
"model_group" TEXT DEFAULT '',
|
||||||
|
"custom_llm_provider" TEXT DEFAULT '',
|
||||||
|
"api_base" TEXT DEFAULT '',
|
||||||
|
"user" TEXT DEFAULT '',
|
||||||
|
"metadata" JSONB DEFAULT '{}',
|
||||||
|
"cache_hit" TEXT DEFAULT '',
|
||||||
|
"cache_key" TEXT DEFAULT '',
|
||||||
|
"request_tags" JSONB DEFAULT '[]',
|
||||||
|
"team_id" TEXT,
|
||||||
|
"end_user" TEXT,
|
||||||
|
"requester_ip_address" TEXT,
|
||||||
|
"messages" JSONB DEFAULT '{}',
|
||||||
|
"response" JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_SpendLogs_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ErrorLogs" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"startTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"endTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"api_base" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_group" TEXT NOT NULL DEFAULT '',
|
||||||
|
"litellm_model_name" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_id" TEXT NOT NULL DEFAULT '',
|
||||||
|
"request_kwargs" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"exception_type" TEXT NOT NULL DEFAULT '',
|
||||||
|
"exception_string" TEXT NOT NULL DEFAULT '',
|
||||||
|
"status_code" TEXT NOT NULL DEFAULT '',
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ErrorLogs_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_UserNotifications" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"models" TEXT[],
|
||||||
|
"justification" TEXT NOT NULL,
|
||||||
|
"status" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_UserNotifications_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_TeamMembership" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"team_id" TEXT NOT NULL,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_TeamMembership_pkey" PRIMARY KEY ("user_id","team_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_OrganizationMembership" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"organization_id" TEXT NOT NULL,
|
||||||
|
"user_role" TEXT,
|
||||||
|
"spend" DOUBLE PRECISION DEFAULT 0.0,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_OrganizationMembership_pkey" PRIMARY KEY ("user_id","organization_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_InvitationLink" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"is_accepted" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"accepted_at" TIMESTAMP(3),
|
||||||
|
"expires_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_InvitationLink_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_AuditLog" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"changed_by" TEXT NOT NULL DEFAULT '',
|
||||||
|
"changed_by_api_key" TEXT NOT NULL DEFAULT '',
|
||||||
|
"action" TEXT NOT NULL,
|
||||||
|
"table_name" TEXT NOT NULL,
|
||||||
|
"object_id" TEXT NOT NULL,
|
||||||
|
"before_value" JSONB,
|
||||||
|
"updated_values" JSONB,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_AuditLog_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_CredentialsTable_credential_name_key" ON "LiteLLM_CredentialsTable"("credential_name");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_TeamTable_model_id_key" ON "LiteLLM_TeamTable"("model_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_UserTable_sso_user_id_key" ON "LiteLLM_UserTable"("sso_user_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_SpendLogs_startTime_idx" ON "LiteLLM_SpendLogs"("startTime");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_SpendLogs_end_user_idx" ON "LiteLLM_SpendLogs"("end_user");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_OrganizationMembership_user_id_organization_id_key" ON "LiteLLM_OrganizationMembership"("user_id", "organization_id");
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationTable" ADD CONSTRAINT "LiteLLM_OrganizationTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_model_id_fkey" FOREIGN KEY ("model_id") REFERENCES "LiteLLM_ModelTable"("id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_UserTable" ADD CONSTRAINT "LiteLLM_UserTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_EndUserTable" ADD CONSTRAINT "LiteLLM_EndUserTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamMembership" ADD CONSTRAINT "LiteLLM_TeamMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_updated_by_fkey" FOREIGN KEY ("updated_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_DailyUserSpend" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"date" TEXT NOT NULL,
|
||||||
|
"api_key" TEXT NOT NULL,
|
||||||
|
"model" TEXT NOT NULL,
|
||||||
|
"model_group" TEXT,
|
||||||
|
"custom_llm_provider" TEXT,
|
||||||
|
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_DailyUserSpend_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_date_idx" ON "LiteLLM_DailyUserSpend"("date");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_user_id_idx" ON "LiteLLM_DailyUserSpend"("user_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_api_key_idx" ON "LiteLLM_DailyUserSpend"("api_key");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_model_idx" ON "LiteLLM_DailyUserSpend"("model");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_DailyUserSpend_user_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyUserSpend"("user_id", "date", "api_key", "model", "custom_llm_provider");
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- AlterTable
|
||||||
|
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "api_requests" INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
-- CreateEnum
|
||||||
|
CREATE TYPE "JobStatus" AS ENUM ('ACTIVE', 'INACTIVE');
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_CronJob" (
|
||||||
|
"cronjob_id" TEXT NOT NULL,
|
||||||
|
"pod_id" TEXT NOT NULL,
|
||||||
|
"status" "JobStatus" NOT NULL DEFAULT 'INACTIVE',
|
||||||
|
"last_updated" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"ttl" TIMESTAMP(3) NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_CronJob_pkey" PRIMARY KEY ("cronjob_id")
|
||||||
|
);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
provider = "postgresql"
|
80
litellm-proxy-extras/litellm_proxy_extras/utils.py
Normal file
80
litellm-proxy-extras/litellm_proxy_extras/utils.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from litellm_proxy_extras._logging import logger
|
||||||
|
|
||||||
|
|
||||||
|
def str_to_bool(value: Optional[str]) -> bool:
|
||||||
|
if value is None:
|
||||||
|
return False
|
||||||
|
return value.lower() in ("true", "1", "t", "y", "yes")
|
||||||
|
|
||||||
|
|
||||||
|
class ProxyExtrasDBManager:
|
||||||
|
@staticmethod
|
||||||
|
def setup_database(schema_path: str, use_migrate: bool = False) -> bool:
|
||||||
|
"""
|
||||||
|
Set up the database using either prisma migrate or prisma db push
|
||||||
|
Uses migrations from litellm-proxy-extras package
|
||||||
|
|
||||||
|
Args:
|
||||||
|
schema_path (str): Path to the Prisma schema file
|
||||||
|
use_migrate (bool): Whether to use prisma migrate instead of db push
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if setup was successful, False otherwise
|
||||||
|
"""
|
||||||
|
use_migrate = str_to_bool(os.getenv("USE_PRISMA_MIGRATE")) or use_migrate
|
||||||
|
for attempt in range(4):
|
||||||
|
original_dir = os.getcwd()
|
||||||
|
schema_dir = os.path.dirname(schema_path)
|
||||||
|
os.chdir(schema_dir)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if use_migrate:
|
||||||
|
logger.info("Running prisma migrate deploy")
|
||||||
|
try:
|
||||||
|
# Set migrations directory for Prisma
|
||||||
|
subprocess.run(
|
||||||
|
["prisma", "migrate", "deploy"],
|
||||||
|
timeout=60,
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
logger.info("prisma migrate deploy completed")
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.info(f"prisma db error: {e.stderr}, e: {e.stdout}")
|
||||||
|
if (
|
||||||
|
"P3005" in e.stderr
|
||||||
|
and "database schema is not empty" in e.stderr
|
||||||
|
):
|
||||||
|
logger.info("Error: Database schema is not empty")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Use prisma db push with increased timeout
|
||||||
|
subprocess.run(
|
||||||
|
["prisma", "db", "push", "--accept-data-loss"],
|
||||||
|
timeout=60,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.info(f"Attempt {attempt + 1} timed out")
|
||||||
|
time.sleep(random.randrange(5, 15))
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
attempts_left = 3 - attempt
|
||||||
|
retry_msg = (
|
||||||
|
f" Retrying... ({attempts_left} attempts left)"
|
||||||
|
if attempts_left > 0
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
logger.info(f"The process failed to execute. Details: {e}.{retry_msg}")
|
||||||
|
time.sleep(random.randrange(5, 15))
|
||||||
|
finally:
|
||||||
|
os.chdir(original_dir)
|
||||||
|
return False
|
30
litellm-proxy-extras/pyproject.toml
Normal file
30
litellm-proxy-extras/pyproject.toml
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "litellm-proxy-extras"
|
||||||
|
version = "0.1.1"
|
||||||
|
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
||||||
|
authors = ["BerriAI"]
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.urls]
|
||||||
|
homepage = "https://litellm.ai"
|
||||||
|
Homepage = "https://litellm.ai"
|
||||||
|
repository = "https://github.com/BerriAI/litellm"
|
||||||
|
Repository = "https://github.com/BerriAI/litellm"
|
||||||
|
documentation = "https://docs.litellm.ai"
|
||||||
|
Documentation = "https://docs.litellm.ai"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = ">=3.8.1,<4.0, !=3.9.7"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
[tool.commitizen]
|
||||||
|
version = "0.1.1"
|
||||||
|
version_files = [
|
||||||
|
"pyproject.toml:version",
|
||||||
|
"../requirements.txt:litellm-proxy-extras==",
|
||||||
|
"../pyproject.toml:litellm-proxy-extras = {version = \""
|
||||||
|
]
|
0
litellm-proxy-extras/tests/__init__.py
Normal file
0
litellm-proxy-extras/tests/__init__.py
Normal file
|
@ -2,7 +2,7 @@
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
|
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
|
||||||
### INIT VARIABLES ##########
|
### INIT VARIABLES ###########
|
||||||
import threading
|
import threading
|
||||||
import os
|
import os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||||
|
@ -122,19 +122,19 @@ langsmith_batch_size: Optional[int] = None
|
||||||
prometheus_initialize_budget_metrics: Optional[bool] = False
|
prometheus_initialize_budget_metrics: Optional[bool] = False
|
||||||
argilla_batch_size: Optional[int] = None
|
argilla_batch_size: Optional[int] = None
|
||||||
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
|
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
|
||||||
gcs_pub_sub_use_v1: Optional[bool] = (
|
gcs_pub_sub_use_v1: Optional[
|
||||||
False # if you want to use v1 gcs pubsub logged payload
|
bool
|
||||||
)
|
] = False # if you want to use v1 gcs pubsub logged payload
|
||||||
argilla_transformation_object: Optional[Dict[str, Any]] = None
|
argilla_transformation_object: Optional[Dict[str, Any]] = None
|
||||||
_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
|
_async_input_callback: List[
|
||||||
[]
|
Union[str, Callable, CustomLogger]
|
||||||
) # internal variable - async custom callbacks are routed here.
|
] = [] # internal variable - async custom callbacks are routed here.
|
||||||
_async_success_callback: List[Union[str, Callable, CustomLogger]] = (
|
_async_success_callback: List[
|
||||||
[]
|
Union[str, Callable, CustomLogger]
|
||||||
) # internal variable - async custom callbacks are routed here.
|
] = [] # internal variable - async custom callbacks are routed here.
|
||||||
_async_failure_callback: List[Union[str, Callable, CustomLogger]] = (
|
_async_failure_callback: List[
|
||||||
[]
|
Union[str, Callable, CustomLogger]
|
||||||
) # internal variable - async custom callbacks are routed here.
|
] = [] # internal variable - async custom callbacks are routed here.
|
||||||
pre_call_rules: List[Callable] = []
|
pre_call_rules: List[Callable] = []
|
||||||
post_call_rules: List[Callable] = []
|
post_call_rules: List[Callable] = []
|
||||||
turn_off_message_logging: Optional[bool] = False
|
turn_off_message_logging: Optional[bool] = False
|
||||||
|
@ -142,18 +142,18 @@ log_raw_request_response: bool = False
|
||||||
redact_messages_in_exceptions: Optional[bool] = False
|
redact_messages_in_exceptions: Optional[bool] = False
|
||||||
redact_user_api_key_info: Optional[bool] = False
|
redact_user_api_key_info: Optional[bool] = False
|
||||||
filter_invalid_headers: Optional[bool] = False
|
filter_invalid_headers: Optional[bool] = False
|
||||||
add_user_information_to_llm_headers: Optional[bool] = (
|
add_user_information_to_llm_headers: Optional[
|
||||||
None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
|
bool
|
||||||
)
|
] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
|
||||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||||
### end of callbacks #############
|
### end of callbacks #############
|
||||||
|
|
||||||
email: Optional[str] = (
|
email: Optional[
|
||||||
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
str
|
||||||
)
|
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||||
token: Optional[str] = (
|
token: Optional[
|
||||||
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
str
|
||||||
)
|
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||||
telemetry = True
|
telemetry = True
|
||||||
max_tokens = 256 # OpenAI Defaults
|
max_tokens = 256 # OpenAI Defaults
|
||||||
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
|
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
|
||||||
|
@ -229,24 +229,20 @@ enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
|
||||||
enable_caching_on_provider_specific_optional_params: bool = (
|
enable_caching_on_provider_specific_optional_params: bool = (
|
||||||
False # feature-flag for caching on optional params - e.g. 'top_k'
|
False # feature-flag for caching on optional params - e.g. 'top_k'
|
||||||
)
|
)
|
||||||
caching: bool = (
|
caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||||
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||||
)
|
cache: Optional[
|
||||||
caching_with_models: bool = (
|
Cache
|
||||||
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
|
||||||
)
|
|
||||||
cache: Optional[Cache] = (
|
|
||||||
None # cache object <- use this - https://docs.litellm.ai/docs/caching
|
|
||||||
)
|
|
||||||
default_in_memory_ttl: Optional[float] = None
|
default_in_memory_ttl: Optional[float] = None
|
||||||
default_redis_ttl: Optional[float] = None
|
default_redis_ttl: Optional[float] = None
|
||||||
default_redis_batch_cache_expiry: Optional[float] = None
|
default_redis_batch_cache_expiry: Optional[float] = None
|
||||||
model_alias_map: Dict[str, str] = {}
|
model_alias_map: Dict[str, str] = {}
|
||||||
model_group_alias_map: Dict[str, str] = {}
|
model_group_alias_map: Dict[str, str] = {}
|
||||||
max_budget: float = 0.0 # set the max budget across all providers
|
max_budget: float = 0.0 # set the max budget across all providers
|
||||||
budget_duration: Optional[str] = (
|
budget_duration: Optional[
|
||||||
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
str
|
||||||
)
|
] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||||
default_soft_budget: float = (
|
default_soft_budget: float = (
|
||||||
50.0 # by default all litellm proxy keys have a soft budget of 50.0
|
50.0 # by default all litellm proxy keys have a soft budget of 50.0
|
||||||
)
|
)
|
||||||
|
@ -255,15 +251,11 @@ forward_traceparent_to_llm_provider: bool = False
|
||||||
|
|
||||||
_current_cost = 0.0 # private variable, used if max budget is set
|
_current_cost = 0.0 # private variable, used if max budget is set
|
||||||
error_logs: Dict = {}
|
error_logs: Dict = {}
|
||||||
add_function_to_prompt: bool = (
|
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
|
||||||
False # if function calling not supported by api, append function call details to system prompt
|
|
||||||
)
|
|
||||||
client_session: Optional[httpx.Client] = None
|
client_session: Optional[httpx.Client] = None
|
||||||
aclient_session: Optional[httpx.AsyncClient] = None
|
aclient_session: Optional[httpx.AsyncClient] = None
|
||||||
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
|
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
|
||||||
model_cost_map_url: str = (
|
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||||
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
|
||||||
)
|
|
||||||
suppress_debug_info = False
|
suppress_debug_info = False
|
||||||
dynamodb_table_name: Optional[str] = None
|
dynamodb_table_name: Optional[str] = None
|
||||||
s3_callback_params: Optional[Dict] = None
|
s3_callback_params: Optional[Dict] = None
|
||||||
|
@ -285,9 +277,7 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
|
||||||
custom_prometheus_metadata_labels: List[str] = []
|
custom_prometheus_metadata_labels: List[str] = []
|
||||||
#### REQUEST PRIORITIZATION ####
|
#### REQUEST PRIORITIZATION ####
|
||||||
priority_reservation: Optional[Dict[str, float]] = None
|
priority_reservation: Optional[Dict[str, float]] = None
|
||||||
force_ipv4: bool = (
|
force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
|
||||||
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
|
|
||||||
)
|
|
||||||
module_level_aclient = AsyncHTTPHandler(
|
module_level_aclient = AsyncHTTPHandler(
|
||||||
timeout=request_timeout, client_alias="module level aclient"
|
timeout=request_timeout, client_alias="module level aclient"
|
||||||
)
|
)
|
||||||
|
@ -301,13 +291,13 @@ fallbacks: Optional[List] = None
|
||||||
context_window_fallbacks: Optional[List] = None
|
context_window_fallbacks: Optional[List] = None
|
||||||
content_policy_fallbacks: Optional[List] = None
|
content_policy_fallbacks: Optional[List] = None
|
||||||
allowed_fails: int = 3
|
allowed_fails: int = 3
|
||||||
num_retries_per_request: Optional[int] = (
|
num_retries_per_request: Optional[
|
||||||
None # for the request overall (incl. fallbacks + model retries)
|
int
|
||||||
)
|
] = None # for the request overall (incl. fallbacks + model retries)
|
||||||
####### SECRET MANAGERS #####################
|
####### SECRET MANAGERS #####################
|
||||||
secret_manager_client: Optional[Any] = (
|
secret_manager_client: Optional[
|
||||||
None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
|
Any
|
||||||
)
|
] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
|
||||||
_google_kms_resource_name: Optional[str] = None
|
_google_kms_resource_name: Optional[str] = None
|
||||||
_key_management_system: Optional[KeyManagementSystem] = None
|
_key_management_system: Optional[KeyManagementSystem] = None
|
||||||
_key_management_settings: KeyManagementSettings = KeyManagementSettings()
|
_key_management_settings: KeyManagementSettings = KeyManagementSettings()
|
||||||
|
@ -813,6 +803,7 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig
|
||||||
from .llms.maritalk import MaritalkConfig
|
from .llms.maritalk import MaritalkConfig
|
||||||
from .llms.openrouter.chat.transformation import OpenrouterConfig
|
from .llms.openrouter.chat.transformation import OpenrouterConfig
|
||||||
from .llms.anthropic.chat.transformation import AnthropicConfig
|
from .llms.anthropic.chat.transformation import AnthropicConfig
|
||||||
|
from .llms.anthropic.common_utils import AnthropicModelInfo
|
||||||
from .llms.groq.stt.transformation import GroqSTTConfig
|
from .llms.groq.stt.transformation import GroqSTTConfig
|
||||||
from .llms.anthropic.completion.transformation import AnthropicTextConfig
|
from .llms.anthropic.completion.transformation import AnthropicTextConfig
|
||||||
from .llms.triton.completion.transformation import TritonConfig
|
from .llms.triton.completion.transformation import TritonConfig
|
||||||
|
@ -848,6 +839,7 @@ from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||||
VertexGeminiConfig,
|
VertexGeminiConfig,
|
||||||
VertexGeminiConfig as VertexAIConfig,
|
VertexGeminiConfig as VertexAIConfig,
|
||||||
)
|
)
|
||||||
|
from .llms.gemini.common_utils import GeminiModelInfo
|
||||||
from .llms.gemini.chat.transformation import (
|
from .llms.gemini.chat.transformation import (
|
||||||
GoogleAIStudioGeminiConfig,
|
GoogleAIStudioGeminiConfig,
|
||||||
GoogleAIStudioGeminiConfig as GeminiConfig, # aliased to maintain backwards compatibility
|
GoogleAIStudioGeminiConfig as GeminiConfig, # aliased to maintain backwards compatibility
|
||||||
|
@ -950,6 +942,12 @@ openaiOSeriesConfig = OpenAIOSeriesConfig()
|
||||||
from .llms.openai.chat.gpt_transformation import (
|
from .llms.openai.chat.gpt_transformation import (
|
||||||
OpenAIGPTConfig,
|
OpenAIGPTConfig,
|
||||||
)
|
)
|
||||||
|
from .llms.openai.transcriptions.whisper_transformation import (
|
||||||
|
OpenAIWhisperAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
|
from .llms.openai.transcriptions.gpt_transformation import (
|
||||||
|
OpenAIGPTAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
|
|
||||||
openAIGPTConfig = OpenAIGPTConfig()
|
openAIGPTConfig = OpenAIGPTConfig()
|
||||||
from .llms.openai.chat.gpt_audio_transformation import (
|
from .llms.openai.chat.gpt_audio_transformation import (
|
||||||
|
@ -978,6 +976,7 @@ from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
|
||||||
from .llms.friendliai.chat.transformation import FriendliaiChatConfig
|
from .llms.friendliai.chat.transformation import FriendliaiChatConfig
|
||||||
from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
|
from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
|
||||||
from .llms.xai.chat.transformation import XAIChatConfig
|
from .llms.xai.chat.transformation import XAIChatConfig
|
||||||
|
from .llms.xai.common_utils import XAIModelInfo
|
||||||
from .llms.volcengine import VolcEngineConfig
|
from .llms.volcengine import VolcEngineConfig
|
||||||
from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
|
from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
|
||||||
from .llms.azure.azure import (
|
from .llms.azure.azure import (
|
||||||
|
@ -1047,10 +1046,10 @@ from .types.llms.custom_llm import CustomLLMItem
|
||||||
from .types.utils import GenericStreamingChunk
|
from .types.utils import GenericStreamingChunk
|
||||||
|
|
||||||
custom_provider_map: List[CustomLLMItem] = []
|
custom_provider_map: List[CustomLLMItem] = []
|
||||||
_custom_providers: List[str] = (
|
_custom_providers: List[
|
||||||
[]
|
str
|
||||||
) # internal helper util, used to track names of custom providers
|
] = [] # internal helper util, used to track names of custom providers
|
||||||
disable_hf_tokenizer_download: Optional[bool] = (
|
disable_hf_tokenizer_download: Optional[
|
||||||
None # disable huggingface tokenizer download. Defaults to openai clk100
|
bool
|
||||||
)
|
] = None # disable huggingface tokenizer download. Defaults to openai clk100
|
||||||
global_disable_no_log_param: bool = False
|
global_disable_no_log_param: bool = False
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from logging import Formatter
|
from logging import Formatter
|
||||||
|
|
||||||
|
@ -40,9 +41,56 @@ class JsonFormatter(Formatter):
|
||||||
return json.dumps(json_record)
|
return json.dumps(json_record)
|
||||||
|
|
||||||
|
|
||||||
|
# Function to set up exception handlers for JSON logging
|
||||||
|
def _setup_json_exception_handlers(formatter):
|
||||||
|
# Create a handler with JSON formatting for exceptions
|
||||||
|
error_handler = logging.StreamHandler()
|
||||||
|
error_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
# Setup excepthook for uncaught exceptions
|
||||||
|
def json_excepthook(exc_type, exc_value, exc_traceback):
|
||||||
|
record = logging.LogRecord(
|
||||||
|
name="LiteLLM",
|
||||||
|
level=logging.ERROR,
|
||||||
|
pathname="",
|
||||||
|
lineno=0,
|
||||||
|
msg=str(exc_value),
|
||||||
|
args=(),
|
||||||
|
exc_info=(exc_type, exc_value, exc_traceback),
|
||||||
|
)
|
||||||
|
error_handler.handle(record)
|
||||||
|
|
||||||
|
sys.excepthook = json_excepthook
|
||||||
|
|
||||||
|
# Configure asyncio exception handler if possible
|
||||||
|
try:
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
def async_json_exception_handler(loop, context):
|
||||||
|
exception = context.get("exception")
|
||||||
|
if exception:
|
||||||
|
record = logging.LogRecord(
|
||||||
|
name="LiteLLM",
|
||||||
|
level=logging.ERROR,
|
||||||
|
pathname="",
|
||||||
|
lineno=0,
|
||||||
|
msg=str(exception),
|
||||||
|
args=(),
|
||||||
|
exc_info=None,
|
||||||
|
)
|
||||||
|
error_handler.handle(record)
|
||||||
|
else:
|
||||||
|
loop.default_exception_handler(context)
|
||||||
|
|
||||||
|
asyncio.get_event_loop().set_exception_handler(async_json_exception_handler)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Create a formatter and set it for the handler
|
# Create a formatter and set it for the handler
|
||||||
if json_logs:
|
if json_logs:
|
||||||
handler.setFormatter(JsonFormatter())
|
handler.setFormatter(JsonFormatter())
|
||||||
|
_setup_json_exception_handlers(JsonFormatter())
|
||||||
else:
|
else:
|
||||||
formatter = logging.Formatter(
|
formatter = logging.Formatter(
|
||||||
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
|
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
|
||||||
|
@ -65,18 +113,24 @@ def _turn_on_json():
|
||||||
handler = logging.StreamHandler()
|
handler = logging.StreamHandler()
|
||||||
handler.setFormatter(JsonFormatter())
|
handler.setFormatter(JsonFormatter())
|
||||||
|
|
||||||
# Define a list of the loggers to update
|
# Define all loggers to update, including root logger
|
||||||
loggers = [verbose_router_logger, verbose_proxy_logger, verbose_logger]
|
loggers = [logging.getLogger()] + [
|
||||||
|
verbose_router_logger,
|
||||||
|
verbose_proxy_logger,
|
||||||
|
verbose_logger,
|
||||||
|
]
|
||||||
|
|
||||||
# Iterate through each logger and update its handlers
|
# Iterate through each logger and update its handlers
|
||||||
for logger in loggers:
|
for logger in loggers:
|
||||||
# Remove all existing handlers
|
# Remove all existing handlers
|
||||||
for h in logger.handlers[:]:
|
for h in logger.handlers[:]:
|
||||||
logger.removeHandler(h)
|
logger.removeHandler(h)
|
||||||
|
|
||||||
# Add the new handler
|
# Add the new handler
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
# Set up exception handlers
|
||||||
|
_setup_json_exception_handlers(JsonFormatter())
|
||||||
|
|
||||||
|
|
||||||
def _turn_on_debug():
|
def _turn_on_debug():
|
||||||
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug
|
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug
|
||||||
|
|
|
@ -202,6 +202,7 @@ def init_redis_cluster(redis_kwargs) -> redis.RedisCluster:
|
||||||
|
|
||||||
def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
|
def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
|
||||||
sentinel_nodes = redis_kwargs.get("sentinel_nodes")
|
sentinel_nodes = redis_kwargs.get("sentinel_nodes")
|
||||||
|
sentinel_password = redis_kwargs.get("sentinel_password")
|
||||||
service_name = redis_kwargs.get("service_name")
|
service_name = redis_kwargs.get("service_name")
|
||||||
|
|
||||||
if not sentinel_nodes or not service_name:
|
if not sentinel_nodes or not service_name:
|
||||||
|
@ -212,7 +213,11 @@ def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
|
||||||
verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.")
|
verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.")
|
||||||
|
|
||||||
# Set up the Sentinel client
|
# Set up the Sentinel client
|
||||||
sentinel = redis.Sentinel(sentinel_nodes, socket_timeout=0.1)
|
sentinel = redis.Sentinel(
|
||||||
|
sentinel_nodes,
|
||||||
|
socket_timeout=0.1,
|
||||||
|
password=sentinel_password,
|
||||||
|
)
|
||||||
|
|
||||||
# Return the master instance for the given service
|
# Return the master instance for the given service
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ from .types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
OTELClass = OpenTelemetry
|
OTELClass = OpenTelemetry
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
|
@ -153,7 +153,6 @@ def create_batch(
|
||||||
)
|
)
|
||||||
api_base: Optional[str] = None
|
api_base: Optional[str] = None
|
||||||
if custom_llm_provider == "openai":
|
if custom_llm_provider == "openai":
|
||||||
|
|
||||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
api_base = (
|
api_base = (
|
||||||
optional_params.api_base
|
optional_params.api_base
|
||||||
|
@ -358,7 +357,6 @@ def retrieve_batch(
|
||||||
_is_async = kwargs.pop("aretrieve_batch", False) is True
|
_is_async = kwargs.pop("aretrieve_batch", False) is True
|
||||||
api_base: Optional[str] = None
|
api_base: Optional[str] = None
|
||||||
if custom_llm_provider == "openai":
|
if custom_llm_provider == "openai":
|
||||||
|
|
||||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
api_base = (
|
api_base = (
|
||||||
optional_params.api_base
|
optional_params.api_base
|
||||||
|
|
|
@ -9,12 +9,12 @@ Has 4 methods:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
|
@ -66,9 +66,7 @@ class CachingHandlerResponse(BaseModel):
|
||||||
|
|
||||||
cached_result: Optional[Any] = None
|
cached_result: Optional[Any] = None
|
||||||
final_embedding_cached_response: Optional[EmbeddingResponse] = None
|
final_embedding_cached_response: Optional[EmbeddingResponse] = None
|
||||||
embedding_all_elements_cache_hit: bool = (
|
embedding_all_elements_cache_hit: bool = False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
|
||||||
False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LLMCachingHandler:
|
class LLMCachingHandler:
|
||||||
|
@ -738,7 +736,6 @@ class LLMCachingHandler:
|
||||||
if self._should_store_result_in_cache(
|
if self._should_store_result_in_cache(
|
||||||
original_function=self.original_function, kwargs=new_kwargs
|
original_function=self.original_function, kwargs=new_kwargs
|
||||||
):
|
):
|
||||||
|
|
||||||
litellm.cache.add_cache(result, **new_kwargs)
|
litellm.cache.add_cache(result, **new_kwargs)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
@ -865,9 +862,9 @@ class LLMCachingHandler:
|
||||||
}
|
}
|
||||||
|
|
||||||
if litellm.cache is not None:
|
if litellm.cache is not None:
|
||||||
litellm_params["preset_cache_key"] = (
|
litellm_params[
|
||||||
litellm.cache._get_preset_cache_key_from_kwargs(**kwargs)
|
"preset_cache_key"
|
||||||
)
|
] = litellm.cache._get_preset_cache_key_from_kwargs(**kwargs)
|
||||||
else:
|
else:
|
||||||
litellm_params["preset_cache_key"] = None
|
litellm_params["preset_cache_key"] = None
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
import json
|
import json
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
from .base_cache import BaseCache
|
from .base_cache import BaseCache
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ import asyncio
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from typing import TYPE_CHECKING, Any, List, Optional
|
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
|
@ -24,7 +24,7 @@ from .redis_cache import RedisCache
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,6 @@ from .in_memory_cache import InMemoryCache
|
||||||
|
|
||||||
|
|
||||||
class LLMClientCache(InMemoryCache):
|
class LLMClientCache(InMemoryCache):
|
||||||
|
|
||||||
def update_cache_key_with_event_loop(self, key):
|
def update_cache_key_with_event_loop(self, key):
|
||||||
"""
|
"""
|
||||||
Add the event loop to the cache key, to prevent event loop closed errors.
|
Add the event loop to the cache key, to prevent event loop closed errors.
|
||||||
|
|
|
@ -34,7 +34,7 @@ if TYPE_CHECKING:
|
||||||
cluster_pipeline = ClusterPipeline
|
cluster_pipeline = ClusterPipeline
|
||||||
async_redis_client = Redis
|
async_redis_client = Redis
|
||||||
async_redis_cluster_client = RedisCluster
|
async_redis_cluster_client = RedisCluster
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
pipeline = Any
|
pipeline = Any
|
||||||
cluster_pipeline = Any
|
cluster_pipeline = Any
|
||||||
|
@ -57,7 +57,6 @@ class RedisCache(BaseCache):
|
||||||
socket_timeout: Optional[float] = 5.0, # default 5 second timeout
|
socket_timeout: Optional[float] = 5.0, # default 5 second timeout
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
|
||||||
from litellm._service_logger import ServiceLogging
|
from litellm._service_logger import ServiceLogging
|
||||||
|
|
||||||
from .._redis import get_redis_client, get_redis_connection_pool
|
from .._redis import get_redis_client, get_redis_connection_pool
|
||||||
|
@ -1045,3 +1044,109 @@ class RedisCache(BaseCache):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.debug(f"Redis TTL Error: {e}")
|
verbose_logger.debug(f"Redis TTL Error: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
async def async_rpush(
|
||||||
|
self,
|
||||||
|
key: str,
|
||||||
|
values: List[Any],
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
Append one or multiple values to a list stored at key
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: The Redis key of the list
|
||||||
|
values: One or more values to append to the list
|
||||||
|
parent_otel_span: Optional parent OpenTelemetry span
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: The length of the list after the push operation
|
||||||
|
"""
|
||||||
|
_redis_client: Any = self.init_async_client()
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
response = await _redis_client.rpush(key, *values)
|
||||||
|
## LOGGING ##
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="async_rpush",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
|
## LOGGING ##
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_failure_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
error=e,
|
||||||
|
call_type="async_rpush",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.error(
|
||||||
|
f"LiteLLM Redis Cache RPUSH: - Got exception from REDIS : {str(e)}"
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
async def async_lpop(
|
||||||
|
self,
|
||||||
|
key: str,
|
||||||
|
count: Optional[int] = None,
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[Any, List[Any]]:
|
||||||
|
_redis_client: Any = self.init_async_client()
|
||||||
|
start_time = time.time()
|
||||||
|
print_verbose(f"LPOP from Redis list: key: {key}, count: {count}")
|
||||||
|
try:
|
||||||
|
result = await _redis_client.lpop(key, count)
|
||||||
|
## LOGGING ##
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="async_lpop",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle result parsing if needed
|
||||||
|
if isinstance(result, bytes):
|
||||||
|
try:
|
||||||
|
return result.decode("utf-8")
|
||||||
|
except Exception:
|
||||||
|
return result
|
||||||
|
elif isinstance(result, list) and all(
|
||||||
|
isinstance(item, bytes) for item in result
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
return [item.decode("utf-8") for item in result]
|
||||||
|
except Exception:
|
||||||
|
return result
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
|
## LOGGING ##
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_failure_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
error=e,
|
||||||
|
call_type="async_lpop",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.error(
|
||||||
|
f"LiteLLM Redis Cache LPOP: - Got exception from REDIS : {str(e)}"
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
|
@ -5,7 +5,7 @@ Key differences:
|
||||||
- RedisClient NEEDs to be re-used across requests, adds 3000ms latency if it's re-created
|
- RedisClient NEEDs to be re-used across requests, adds 3000ms latency if it's re-created
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Any, List, Optional
|
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||||
|
|
||||||
from litellm.caching.redis_cache import RedisCache
|
from litellm.caching.redis_cache import RedisCache
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
pipeline = Pipeline
|
pipeline = Pipeline
|
||||||
async_redis_client = Redis
|
async_redis_client = Redis
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
pipeline = Any
|
pipeline = Any
|
||||||
async_redis_client = Any
|
async_redis_client = Any
|
||||||
|
|
|
@ -13,11 +13,15 @@ import ast
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple, cast
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import print_verbose
|
from litellm._logging import print_verbose
|
||||||
from litellm.litellm_core_utils.prompt_templates.common_utils import get_str_from_messages
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
|
get_str_from_messages,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import EmbeddingResponse
|
||||||
|
|
||||||
from .base_cache import BaseCache
|
from .base_cache import BaseCache
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,14 +91,16 @@ class RedisSemanticCache(BaseCache):
|
||||||
if redis_url is None:
|
if redis_url is None:
|
||||||
try:
|
try:
|
||||||
# Attempt to use provided parameters or fallback to environment variables
|
# Attempt to use provided parameters or fallback to environment variables
|
||||||
host = host or os.environ['REDIS_HOST']
|
host = host or os.environ["REDIS_HOST"]
|
||||||
port = port or os.environ['REDIS_PORT']
|
port = port or os.environ["REDIS_PORT"]
|
||||||
password = password or os.environ['REDIS_PASSWORD']
|
password = password or os.environ["REDIS_PASSWORD"]
|
||||||
except KeyError as e:
|
except KeyError as e:
|
||||||
# Raise a more informative exception if any of the required keys are missing
|
# Raise a more informative exception if any of the required keys are missing
|
||||||
missing_var = e.args[0]
|
missing_var = e.args[0]
|
||||||
raise ValueError(f"Missing required Redis configuration: {missing_var}. "
|
raise ValueError(
|
||||||
f"Provide {missing_var} or redis_url.") from e
|
f"Missing required Redis configuration: {missing_var}. "
|
||||||
|
f"Provide {missing_var} or redis_url."
|
||||||
|
) from e
|
||||||
|
|
||||||
redis_url = f"redis://:{password}@{host}:{port}"
|
redis_url = f"redis://:{password}@{host}:{port}"
|
||||||
|
|
||||||
|
@ -137,10 +143,13 @@ class RedisSemanticCache(BaseCache):
|
||||||
List[float]: The embedding vector
|
List[float]: The embedding vector
|
||||||
"""
|
"""
|
||||||
# Create an embedding from prompt
|
# Create an embedding from prompt
|
||||||
embedding_response = litellm.embedding(
|
embedding_response = cast(
|
||||||
|
EmbeddingResponse,
|
||||||
|
litellm.embedding(
|
||||||
model=self.embedding_model,
|
model=self.embedding_model,
|
||||||
input=prompt,
|
input=prompt,
|
||||||
cache={"no-store": True, "no-cache": True},
|
cache={"no-store": True, "no-cache": True},
|
||||||
|
),
|
||||||
)
|
)
|
||||||
embedding = embedding_response["data"][0]["embedding"]
|
embedding = embedding_response["data"][0]["embedding"]
|
||||||
return embedding
|
return embedding
|
||||||
|
@ -186,6 +195,7 @@ class RedisSemanticCache(BaseCache):
|
||||||
"""
|
"""
|
||||||
print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}")
|
print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}")
|
||||||
|
|
||||||
|
value_str: Optional[str] = None
|
||||||
try:
|
try:
|
||||||
# Extract the prompt from messages
|
# Extract the prompt from messages
|
||||||
messages = kwargs.get("messages", [])
|
messages = kwargs.get("messages", [])
|
||||||
|
@ -203,7 +213,9 @@ class RedisSemanticCache(BaseCache):
|
||||||
else:
|
else:
|
||||||
self.llmcache.store(prompt, value_str)
|
self.llmcache.store(prompt, value_str)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Error setting {value_str} in the Redis semantic cache: {str(e)}")
|
print_verbose(
|
||||||
|
f"Error setting {value_str or value} in the Redis semantic cache: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
def get_cache(self, key: str, **kwargs) -> Any:
|
def get_cache(self, key: str, **kwargs) -> Any:
|
||||||
"""
|
"""
|
||||||
|
@ -336,13 +348,13 @@ class RedisSemanticCache(BaseCache):
|
||||||
prompt,
|
prompt,
|
||||||
value_str,
|
value_str,
|
||||||
vector=prompt_embedding, # Pass through custom embedding
|
vector=prompt_embedding, # Pass through custom embedding
|
||||||
ttl=ttl
|
ttl=ttl,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
await self.llmcache.astore(
|
await self.llmcache.astore(
|
||||||
prompt,
|
prompt,
|
||||||
value_str,
|
value_str,
|
||||||
vector=prompt_embedding # Pass through custom embedding
|
vector=prompt_embedding, # Pass through custom embedding
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Error in async_set_cache: {str(e)}")
|
print_verbose(f"Error in async_set_cache: {str(e)}")
|
||||||
|
@ -374,14 +386,13 @@ class RedisSemanticCache(BaseCache):
|
||||||
prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
|
prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
|
||||||
|
|
||||||
# Check the cache for semantically similar prompts
|
# Check the cache for semantically similar prompts
|
||||||
results = await self.llmcache.acheck(
|
results = await self.llmcache.acheck(prompt=prompt, vector=prompt_embedding)
|
||||||
prompt=prompt,
|
|
||||||
vector=prompt_embedding
|
|
||||||
)
|
|
||||||
|
|
||||||
# handle results / cache hit
|
# handle results / cache hit
|
||||||
if not results:
|
if not results:
|
||||||
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 # TODO why here but not above??
|
kwargs.setdefault("metadata", {})[
|
||||||
|
"semantic-similarity"
|
||||||
|
] = 0.0 # TODO why here but not above??
|
||||||
return None
|
return None
|
||||||
|
|
||||||
cache_hit = results[0]
|
cache_hit = results[0]
|
||||||
|
@ -420,7 +431,9 @@ class RedisSemanticCache(BaseCache):
|
||||||
aindex = await self.llmcache._get_async_index()
|
aindex = await self.llmcache._get_async_index()
|
||||||
return await aindex.info()
|
return await aindex.info()
|
||||||
|
|
||||||
async def async_set_cache_pipeline(self, cache_list: List[Tuple[str, Any]], **kwargs) -> None:
|
async def async_set_cache_pipeline(
|
||||||
|
self, cache_list: List[Tuple[str, Any]], **kwargs
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Asynchronously store multiple values in the semantic cache.
|
Asynchronously store multiple values in the semantic cache.
|
||||||
|
|
||||||
|
|
|
@ -123,7 +123,7 @@ class S3Cache(BaseCache):
|
||||||
) # Convert string to dictionary
|
) # Convert string to dictionary
|
||||||
except Exception:
|
except Exception:
|
||||||
cached_response = ast.literal_eval(cached_response)
|
cached_response = ast.literal_eval(cached_response)
|
||||||
if type(cached_response) is not dict:
|
if not isinstance(cached_response, dict):
|
||||||
cached_response = dict(cached_response)
|
cached_response = dict(cached_response)
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
|
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
|
||||||
|
|
|
@ -4,9 +4,11 @@ ROUTER_MAX_FALLBACKS = 5
|
||||||
DEFAULT_BATCH_SIZE = 512
|
DEFAULT_BATCH_SIZE = 512
|
||||||
DEFAULT_FLUSH_INTERVAL_SECONDS = 5
|
DEFAULT_FLUSH_INTERVAL_SECONDS = 5
|
||||||
DEFAULT_MAX_RETRIES = 2
|
DEFAULT_MAX_RETRIES = 2
|
||||||
|
DEFAULT_MAX_RECURSE_DEPTH = 10
|
||||||
DEFAULT_FAILURE_THRESHOLD_PERCENT = (
|
DEFAULT_FAILURE_THRESHOLD_PERCENT = (
|
||||||
0.5 # default cooldown a deployment if 50% of requests fail in a given minute
|
0.5 # default cooldown a deployment if 50% of requests fail in a given minute
|
||||||
)
|
)
|
||||||
|
DEFAULT_MAX_TOKENS = 4096
|
||||||
DEFAULT_REDIS_SYNC_INTERVAL = 1
|
DEFAULT_REDIS_SYNC_INTERVAL = 1
|
||||||
DEFAULT_COOLDOWN_TIME_SECONDS = 5
|
DEFAULT_COOLDOWN_TIME_SECONDS = 5
|
||||||
DEFAULT_REPLICATE_POLLING_RETRIES = 5
|
DEFAULT_REPLICATE_POLLING_RETRIES = 5
|
||||||
|
@ -16,6 +18,8 @@ DEFAULT_IMAGE_WIDTH = 300
|
||||||
DEFAULT_IMAGE_HEIGHT = 300
|
DEFAULT_IMAGE_HEIGHT = 300
|
||||||
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
||||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||||
|
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
|
||||||
|
MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
|
||||||
#### RELIABILITY ####
|
#### RELIABILITY ####
|
||||||
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
||||||
#### Networking settings ####
|
#### Networking settings ####
|
||||||
|
@ -414,6 +418,7 @@ RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when conv
|
||||||
|
|
||||||
########################### Logging Callback Constants ###########################
|
########################### Logging Callback Constants ###########################
|
||||||
AZURE_STORAGE_MSFT_VERSION = "2019-07-07"
|
AZURE_STORAGE_MSFT_VERSION = "2019-07-07"
|
||||||
|
MCP_TOOL_NAME_PREFIX = "mcp_tool"
|
||||||
|
|
||||||
########################### LiteLLM Proxy Specific Constants ###########################
|
########################### LiteLLM Proxy Specific Constants ###########################
|
||||||
########################################################################################
|
########################################################################################
|
||||||
|
@ -441,3 +446,7 @@ HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds
|
||||||
|
|
||||||
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
|
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
|
||||||
LITELLM_PROXY_ADMIN_NAME = "default_user_id"
|
LITELLM_PROXY_ADMIN_NAME = "default_user_id"
|
||||||
|
|
||||||
|
########################### DB CRON JOB NAMES ###########################
|
||||||
|
DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job"
|
||||||
|
DEFAULT_CRON_JOB_LOCK_TTL_SECONDS = 60 # 1 minute
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
## File for 'response_cost' calculation in Logging
|
## File for 'response_cost' calculation in Logging
|
||||||
import time
|
import time
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import Any, List, Literal, Optional, Tuple, Union
|
from typing import Any, List, Literal, Optional, Tuple, Union, cast
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
@ -275,15 +275,13 @@ def cost_per_token( # noqa: PLR0915
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_characters=prompt_characters,
|
prompt_characters=prompt_characters,
|
||||||
completion_characters=completion_characters,
|
completion_characters=completion_characters,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage_block,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
elif cost_router == "cost_per_token":
|
elif cost_router == "cost_per_token":
|
||||||
return google_cost_per_token(
|
return google_cost_per_token(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage_block,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "anthropic":
|
elif custom_llm_provider == "anthropic":
|
||||||
return anthropic_cost_per_token(model=model, usage=usage_block)
|
return anthropic_cost_per_token(model=model, usage=usage_block)
|
||||||
|
@ -464,13 +462,36 @@ def _model_contains_known_llm_provider(model: str) -> bool:
|
||||||
def _get_usage_object(
|
def _get_usage_object(
|
||||||
completion_response: Any,
|
completion_response: Any,
|
||||||
) -> Optional[Usage]:
|
) -> Optional[Usage]:
|
||||||
usage_obj: Optional[Usage] = None
|
usage_obj = cast(
|
||||||
if completion_response is not None and isinstance(
|
Union[Usage, ResponseAPIUsage, dict, BaseModel],
|
||||||
completion_response, ModelResponse
|
(
|
||||||
):
|
completion_response.get("usage")
|
||||||
usage_obj = completion_response.get("usage")
|
if isinstance(completion_response, dict)
|
||||||
|
else getattr(completion_response, "get", lambda x: None)("usage")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if usage_obj is None:
|
||||||
|
return None
|
||||||
|
if isinstance(usage_obj, Usage):
|
||||||
return usage_obj
|
return usage_obj
|
||||||
|
elif (
|
||||||
|
usage_obj is not None
|
||||||
|
and (isinstance(usage_obj, dict) or isinstance(usage_obj, ResponseAPIUsage))
|
||||||
|
and ResponseAPILoggingUtils._is_response_api_usage(usage_obj)
|
||||||
|
):
|
||||||
|
return ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||||
|
usage_obj
|
||||||
|
)
|
||||||
|
elif isinstance(usage_obj, dict):
|
||||||
|
return Usage(**usage_obj)
|
||||||
|
elif isinstance(usage_obj, BaseModel):
|
||||||
|
return Usage(**usage_obj.model_dump())
|
||||||
|
else:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Unknown usage object type: {type(usage_obj)}, usage_obj: {usage_obj}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _is_known_usage_objects(usage_obj):
|
def _is_known_usage_objects(usage_obj):
|
||||||
|
@ -559,7 +580,6 @@ def completion_cost( # noqa: PLR0915
|
||||||
- For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
|
- For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
call_type = _infer_call_type(call_type, completion_response) or "completion"
|
call_type = _infer_call_type(call_type, completion_response) or "completion"
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
@ -664,6 +684,7 @@ def completion_cost( # noqa: PLR0915
|
||||||
elif len(prompt) > 0:
|
elif len(prompt) > 0:
|
||||||
prompt_tokens = token_counter(model=model, text=prompt)
|
prompt_tokens = token_counter(model=model, text=prompt)
|
||||||
completion_tokens = token_counter(model=model, text=completion)
|
completion_tokens = token_counter(model=model, text=completion)
|
||||||
|
|
||||||
if model is None:
|
if model is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||||
|
@ -828,11 +849,14 @@ def get_response_cost_from_hidden_params(
|
||||||
_hidden_params_dict = hidden_params
|
_hidden_params_dict = hidden_params
|
||||||
|
|
||||||
additional_headers = _hidden_params_dict.get("additional_headers", {})
|
additional_headers = _hidden_params_dict.get("additional_headers", {})
|
||||||
if additional_headers and "x-litellm-response-cost" in additional_headers:
|
if (
|
||||||
response_cost = additional_headers["x-litellm-response-cost"]
|
additional_headers
|
||||||
|
and "llm_provider-x-litellm-response-cost" in additional_headers
|
||||||
|
):
|
||||||
|
response_cost = additional_headers["llm_provider-x-litellm-response-cost"]
|
||||||
if response_cost is None:
|
if response_cost is None:
|
||||||
return None
|
return None
|
||||||
return float(additional_headers["x-litellm-response-cost"])
|
return float(additional_headers["llm_provider-x-litellm-response-cost"])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import json
|
import json
|
||||||
from typing import List, Literal, Union
|
from typing import Dict, List, Literal, Union
|
||||||
|
|
||||||
from mcp import ClientSession
|
from mcp import ClientSession
|
||||||
from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
|
from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
|
||||||
|
@ -76,8 +76,8 @@ def _get_function_arguments(function: FunctionDefinition) -> dict:
|
||||||
return arguments if isinstance(arguments, dict) else {}
|
return arguments if isinstance(arguments, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
def _transform_openai_tool_call_to_mcp_tool_call_request(
|
def transform_openai_tool_call_request_to_mcp_tool_call_request(
|
||||||
openai_tool: ChatCompletionMessageToolCall,
|
openai_tool: Union[ChatCompletionMessageToolCall, Dict],
|
||||||
) -> MCPCallToolRequestParams:
|
) -> MCPCallToolRequestParams:
|
||||||
"""Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams."""
|
"""Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams."""
|
||||||
function = openai_tool["function"]
|
function = openai_tool["function"]
|
||||||
|
@ -100,9 +100,11 @@ async def call_openai_tool(
|
||||||
Returns:
|
Returns:
|
||||||
The result of the MCP tool call.
|
The result of the MCP tool call.
|
||||||
"""
|
"""
|
||||||
mcp_tool_call_request_params = _transform_openai_tool_call_to_mcp_tool_call_request(
|
mcp_tool_call_request_params = (
|
||||||
|
transform_openai_tool_call_request_to_mcp_tool_call_request(
|
||||||
openai_tool=openai_tool,
|
openai_tool=openai_tool,
|
||||||
)
|
)
|
||||||
|
)
|
||||||
return await call_mcp_tool(
|
return await call_mcp_tool(
|
||||||
session=session,
|
session=session,
|
||||||
call_tool_request_params=mcp_tool_call_request_params,
|
call_tool_request_params=mcp_tool_call_request_params,
|
||||||
|
|
|
@ -138,7 +138,6 @@ def create_fine_tuning_job(
|
||||||
|
|
||||||
# OpenAI
|
# OpenAI
|
||||||
if custom_llm_provider == "openai":
|
if custom_llm_provider == "openai":
|
||||||
|
|
||||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
api_base = (
|
api_base = (
|
||||||
optional_params.api_base
|
optional_params.api_base
|
||||||
|
@ -360,7 +359,6 @@ def cancel_fine_tuning_job(
|
||||||
|
|
||||||
# OpenAI
|
# OpenAI
|
||||||
if custom_llm_provider == "openai":
|
if custom_llm_provider == "openai":
|
||||||
|
|
||||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
api_base = (
|
api_base = (
|
||||||
optional_params.api_base
|
optional_params.api_base
|
||||||
|
@ -522,7 +520,6 @@ def list_fine_tuning_jobs(
|
||||||
|
|
||||||
# OpenAI
|
# OpenAI
|
||||||
if custom_llm_provider == "openai":
|
if custom_llm_provider == "openai":
|
||||||
|
|
||||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
api_base = (
|
api_base = (
|
||||||
optional_params.api_base
|
optional_params.api_base
|
||||||
|
|
|
@ -19,7 +19,6 @@ else:
|
||||||
|
|
||||||
|
|
||||||
def squash_payloads(queue):
|
def squash_payloads(queue):
|
||||||
|
|
||||||
squashed = {}
|
squashed = {}
|
||||||
if len(queue) == 0:
|
if len(queue) == 0:
|
||||||
return squashed
|
return squashed
|
||||||
|
|
|
@ -195,13 +195,16 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
if self.alerting is None or self.alert_types is None:
|
if self.alerting is None or self.alert_types is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
time_difference_float, model, api_base, messages = (
|
(
|
||||||
self._response_taking_too_long_callback_helper(
|
time_difference_float,
|
||||||
|
model,
|
||||||
|
api_base,
|
||||||
|
messages,
|
||||||
|
) = self._response_taking_too_long_callback_helper(
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
if litellm.turn_off_message_logging or litellm.redact_messages_in_exceptions:
|
if litellm.turn_off_message_logging or litellm.redact_messages_in_exceptions:
|
||||||
messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
|
messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
|
||||||
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
|
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
|
||||||
|
@ -819,9 +822,9 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
### UNIQUE CACHE KEY ###
|
### UNIQUE CACHE KEY ###
|
||||||
cache_key = provider + region_name
|
cache_key = provider + region_name
|
||||||
|
|
||||||
outage_value: Optional[ProviderRegionOutageModel] = (
|
outage_value: Optional[
|
||||||
await self.internal_usage_cache.async_get_cache(key=cache_key)
|
ProviderRegionOutageModel
|
||||||
)
|
] = await self.internal_usage_cache.async_get_cache(key=cache_key)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
getattr(exception, "status_code", None) is None
|
getattr(exception, "status_code", None) is None
|
||||||
|
@ -1402,9 +1405,9 @@ Model Info:
|
||||||
self.alert_to_webhook_url is not None
|
self.alert_to_webhook_url is not None
|
||||||
and alert_type in self.alert_to_webhook_url
|
and alert_type in self.alert_to_webhook_url
|
||||||
):
|
):
|
||||||
slack_webhook_url: Optional[Union[str, List[str]]] = (
|
slack_webhook_url: Optional[
|
||||||
self.alert_to_webhook_url[alert_type]
|
Union[str, List[str]]
|
||||||
)
|
] = self.alert_to_webhook_url[alert_type]
|
||||||
elif self.default_webhook_url is not None:
|
elif self.default_webhook_url is not None:
|
||||||
slack_webhook_url = self.default_webhook_url
|
slack_webhook_url = self.default_webhook_url
|
||||||
else:
|
else:
|
||||||
|
@ -1768,7 +1771,6 @@ Model Info:
|
||||||
- Team Created, Updated, Deleted
|
- Team Created, Updated, Deleted
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
message = f"`{event_name}`\n"
|
message = f"`{event_name}`\n"
|
||||||
|
|
||||||
key_event_dict = key_event.model_dump()
|
key_event_dict = key_event.model_dump()
|
||||||
|
|
|
@ -98,7 +98,6 @@ class ArgillaLogger(CustomBatchLogger):
|
||||||
argilla_dataset_name: Optional[str],
|
argilla_dataset_name: Optional[str],
|
||||||
argilla_base_url: Optional[str],
|
argilla_base_url: Optional[str],
|
||||||
) -> ArgillaCredentialsObject:
|
) -> ArgillaCredentialsObject:
|
||||||
|
|
||||||
_credentials_api_key = argilla_api_key or os.getenv("ARGILLA_API_KEY")
|
_credentials_api_key = argilla_api_key or os.getenv("ARGILLA_API_KEY")
|
||||||
if _credentials_api_key is None:
|
if _credentials_api_key is None:
|
||||||
raise Exception("Invalid Argilla API Key given. _credentials_api_key=None.")
|
raise Exception("Invalid Argilla API Key given. _credentials_api_key=None.")
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
||||||
|
@ -7,7 +7,7 @@ from litellm.types.utils import StandardLoggingPayload
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
|
@ -19,14 +19,13 @@ if TYPE_CHECKING:
|
||||||
from litellm.types.integrations.arize import Protocol as _Protocol
|
from litellm.types.integrations.arize import Protocol as _Protocol
|
||||||
|
|
||||||
Protocol = _Protocol
|
Protocol = _Protocol
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Protocol = Any
|
Protocol = Any
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
||||||
class ArizeLogger(OpenTelemetry):
|
class ArizeLogger(OpenTelemetry):
|
||||||
|
|
||||||
def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
|
def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
|
||||||
ArizeLogger.set_arize_attributes(span, kwargs, response_obj)
|
ArizeLogger.set_arize_attributes(span, kwargs, response_obj)
|
||||||
return
|
return
|
||||||
|
|
|
@ -1,17 +1,20 @@
|
||||||
import os
|
import os
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any, Union
|
||||||
from litellm.integrations.arize import _utils
|
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.integrations.arize import _utils
|
||||||
from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig
|
from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
|
|
||||||
from litellm.types.integrations.arize import Protocol as _Protocol
|
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
|
from litellm.types.integrations.arize import Protocol as _Protocol
|
||||||
|
|
||||||
|
from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
|
||||||
|
|
||||||
Protocol = _Protocol
|
Protocol = _Protocol
|
||||||
OpenTelemetryConfig = _OpenTelemetryConfig
|
OpenTelemetryConfig = _OpenTelemetryConfig
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Protocol = Any
|
Protocol = Any
|
||||||
OpenTelemetryConfig = Any
|
OpenTelemetryConfig = Any
|
||||||
|
@ -20,6 +23,7 @@ else:
|
||||||
|
|
||||||
ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://app.phoenix.arize.com/v1/traces"
|
ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://app.phoenix.arize.com/v1/traces"
|
||||||
|
|
||||||
|
|
||||||
class ArizePhoenixLogger:
|
class ArizePhoenixLogger:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_arize_phoenix_attributes(span: Span, kwargs, response_obj):
|
def set_arize_phoenix_attributes(span: Span, kwargs, response_obj):
|
||||||
|
@ -59,15 +63,14 @@ class ArizePhoenixLogger:
|
||||||
# a slightly different auth header format than self hosted phoenix
|
# a slightly different auth header format than self hosted phoenix
|
||||||
if endpoint == ARIZE_HOSTED_PHOENIX_ENDPOINT:
|
if endpoint == ARIZE_HOSTED_PHOENIX_ENDPOINT:
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
raise ValueError("PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used.")
|
raise ValueError(
|
||||||
|
"PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used."
|
||||||
|
)
|
||||||
otlp_auth_headers = f"api_key={api_key}"
|
otlp_auth_headers = f"api_key={api_key}"
|
||||||
elif api_key is not None:
|
elif api_key is not None:
|
||||||
# api_key/auth is optional for self hosted phoenix
|
# api_key/auth is optional for self hosted phoenix
|
||||||
otlp_auth_headers = f"Authorization=Bearer {api_key}"
|
otlp_auth_headers = f"Authorization=Bearer {api_key}"
|
||||||
|
|
||||||
return ArizePhoenixConfig(
|
return ArizePhoenixConfig(
|
||||||
otlp_auth_headers=otlp_auth_headers,
|
otlp_auth_headers=otlp_auth_headers, protocol=protocol, endpoint=endpoint
|
||||||
protocol=protocol,
|
|
||||||
endpoint=endpoint
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,10 @@ class AthinaLogger:
|
||||||
"athina-api-key": self.athina_api_key,
|
"athina-api-key": self.athina_api_key,
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
self.athina_logging_url = os.getenv("ATHINA_BASE_URL", "https://log.athina.ai") + "/api/v1/log/inference"
|
self.athina_logging_url = (
|
||||||
|
os.getenv("ATHINA_BASE_URL", "https://log.athina.ai")
|
||||||
|
+ "/api/v1/log/inference"
|
||||||
|
)
|
||||||
self.additional_keys = [
|
self.additional_keys = [
|
||||||
"environment",
|
"environment",
|
||||||
"prompt_slug",
|
"prompt_slug",
|
||||||
|
|
|
@ -50,12 +50,12 @@ class AzureBlobStorageLogger(CustomBatchLogger):
|
||||||
self.azure_storage_file_system: str = _azure_storage_file_system
|
self.azure_storage_file_system: str = _azure_storage_file_system
|
||||||
|
|
||||||
# Internal variables used for Token based authentication
|
# Internal variables used for Token based authentication
|
||||||
self.azure_auth_token: Optional[str] = (
|
self.azure_auth_token: Optional[
|
||||||
None # the Azure AD token to use for Azure Storage API requests
|
str
|
||||||
)
|
] = None # the Azure AD token to use for Azure Storage API requests
|
||||||
self.token_expiry: Optional[datetime] = (
|
self.token_expiry: Optional[
|
||||||
None # the expiry time of the currentAzure AD token
|
datetime
|
||||||
)
|
] = None # the expiry time of the currentAzure AD token
|
||||||
|
|
||||||
asyncio.create_task(self.periodic_flush())
|
asyncio.create_task(self.periodic_flush())
|
||||||
self.flush_lock = asyncio.Lock()
|
self.flush_lock = asyncio.Lock()
|
||||||
|
@ -153,7 +153,6 @@ class AzureBlobStorageLogger(CustomBatchLogger):
|
||||||
3. Flush the data
|
3. Flush the data
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
if self.azure_storage_account_key:
|
if self.azure_storage_account_key:
|
||||||
await self.upload_to_azure_data_lake_with_azure_account_key(
|
await self.upload_to_azure_data_lake_with_azure_account_key(
|
||||||
payload=payload
|
payload=payload
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
import copy
|
import copy
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional, Dict
|
from typing import Dict, Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
@ -19,7 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
)
|
)
|
||||||
from litellm.utils import print_verbose
|
from litellm.utils import print_verbose
|
||||||
|
|
||||||
global_braintrust_http_handler = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback)
|
global_braintrust_http_handler = get_async_httpx_client(
|
||||||
|
llm_provider=httpxSpecialProvider.LoggingCallback
|
||||||
|
)
|
||||||
global_braintrust_sync_http_handler = HTTPHandler()
|
global_braintrust_sync_http_handler = HTTPHandler()
|
||||||
API_BASE = "https://api.braintrustdata.com/v1"
|
API_BASE = "https://api.braintrustdata.com/v1"
|
||||||
|
|
||||||
|
@ -35,7 +37,9 @@ def get_utc_datetime():
|
||||||
|
|
||||||
|
|
||||||
class BraintrustLogger(CustomLogger):
|
class BraintrustLogger(CustomLogger):
|
||||||
def __init__(self, api_key: Optional[str] = None, api_base: Optional[str] = None) -> None:
|
def __init__(
|
||||||
|
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||||
|
) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.validate_environment(api_key=api_key)
|
self.validate_environment(api_key=api_key)
|
||||||
self.api_base = api_base or API_BASE
|
self.api_base = api_base or API_BASE
|
||||||
|
@ -45,7 +49,9 @@ class BraintrustLogger(CustomLogger):
|
||||||
"Authorization": "Bearer " + self.api_key,
|
"Authorization": "Bearer " + self.api_key,
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
self._project_id_cache: Dict[str, str] = {} # Cache mapping project names to IDs
|
self._project_id_cache: Dict[
|
||||||
|
str, str
|
||||||
|
] = {} # Cache mapping project names to IDs
|
||||||
|
|
||||||
def validate_environment(self, api_key: Optional[str]):
|
def validate_environment(self, api_key: Optional[str]):
|
||||||
"""
|
"""
|
||||||
|
@ -71,7 +77,9 @@ class BraintrustLogger(CustomLogger):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = global_braintrust_sync_http_handler.post(
|
response = global_braintrust_sync_http_handler.post(
|
||||||
f"{self.api_base}/project", headers=self.headers, json={"name": project_name}
|
f"{self.api_base}/project",
|
||||||
|
headers=self.headers,
|
||||||
|
json={"name": project_name},
|
||||||
)
|
)
|
||||||
project_dict = response.json()
|
project_dict = response.json()
|
||||||
project_id = project_dict["id"]
|
project_id = project_dict["id"]
|
||||||
|
@ -89,7 +97,9 @@ class BraintrustLogger(CustomLogger):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await global_braintrust_http_handler.post(
|
response = await global_braintrust_http_handler.post(
|
||||||
f"{self.api_base}/project/register", headers=self.headers, json={"name": project_name}
|
f"{self.api_base}/project/register",
|
||||||
|
headers=self.headers,
|
||||||
|
json={"name": project_name},
|
||||||
)
|
)
|
||||||
project_dict = response.json()
|
project_dict = response.json()
|
||||||
project_id = project_dict["id"]
|
project_id = project_dict["id"]
|
||||||
|
@ -116,15 +126,21 @@ class BraintrustLogger(CustomLogger):
|
||||||
if metadata is None:
|
if metadata is None:
|
||||||
metadata = {}
|
metadata = {}
|
||||||
|
|
||||||
proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
|
proxy_headers = (
|
||||||
|
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
|
||||||
|
)
|
||||||
|
|
||||||
for metadata_param_key in proxy_headers:
|
for metadata_param_key in proxy_headers:
|
||||||
if metadata_param_key.startswith("braintrust"):
|
if metadata_param_key.startswith("braintrust"):
|
||||||
trace_param_key = metadata_param_key.replace("braintrust", "", 1)
|
trace_param_key = metadata_param_key.replace("braintrust", "", 1)
|
||||||
if trace_param_key in metadata:
|
if trace_param_key in metadata:
|
||||||
verbose_logger.warning(f"Overwriting Braintrust `{trace_param_key}` from request header")
|
verbose_logger.warning(
|
||||||
|
f"Overwriting Braintrust `{trace_param_key}` from request header"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
verbose_logger.debug(f"Found Braintrust `{trace_param_key}` in request header")
|
verbose_logger.debug(
|
||||||
|
f"Found Braintrust `{trace_param_key}` in request header"
|
||||||
|
)
|
||||||
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
|
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
@ -157,24 +173,35 @@ class BraintrustLogger(CustomLogger):
|
||||||
output = None
|
output = None
|
||||||
choices = []
|
choices = []
|
||||||
if response_obj is not None and (
|
if response_obj is not None and (
|
||||||
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
|
kwargs.get("call_type", None) == "embedding"
|
||||||
|
or isinstance(response_obj, litellm.EmbeddingResponse)
|
||||||
):
|
):
|
||||||
output = None
|
output = None
|
||||||
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ModelResponse
|
||||||
|
):
|
||||||
output = response_obj["choices"][0]["message"].json()
|
output = response_obj["choices"][0]["message"].json()
|
||||||
choices = response_obj["choices"]
|
choices = response_obj["choices"]
|
||||||
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.TextCompletionResponse
|
||||||
|
):
|
||||||
output = response_obj.choices[0].text
|
output = response_obj.choices[0].text
|
||||||
choices = response_obj.choices
|
choices = response_obj.choices
|
||||||
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ImageResponse
|
||||||
|
):
|
||||||
output = response_obj["data"]
|
output = response_obj["data"]
|
||||||
|
|
||||||
litellm_params = kwargs.get("litellm_params", {})
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
) # if litellm_params['metadata'] == None
|
||||||
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
||||||
clean_metadata = {}
|
clean_metadata = {}
|
||||||
try:
|
try:
|
||||||
metadata = copy.deepcopy(metadata) # Avoid modifying the original metadata
|
metadata = copy.deepcopy(
|
||||||
|
metadata
|
||||||
|
) # Avoid modifying the original metadata
|
||||||
except Exception:
|
except Exception:
|
||||||
new_metadata = {}
|
new_metadata = {}
|
||||||
for key, value in metadata.items():
|
for key, value in metadata.items():
|
||||||
|
@ -192,7 +219,9 @@ class BraintrustLogger(CustomLogger):
|
||||||
project_id = metadata.get("project_id")
|
project_id = metadata.get("project_id")
|
||||||
if project_id is None:
|
if project_id is None:
|
||||||
project_name = metadata.get("project_name")
|
project_name = metadata.get("project_name")
|
||||||
project_id = self.get_project_id_sync(project_name) if project_name else None
|
project_id = (
|
||||||
|
self.get_project_id_sync(project_name) if project_name else None
|
||||||
|
)
|
||||||
|
|
||||||
if project_id is None:
|
if project_id is None:
|
||||||
if self.default_project_id is None:
|
if self.default_project_id is None:
|
||||||
|
@ -234,7 +263,8 @@ class BraintrustLogger(CustomLogger):
|
||||||
"completion_tokens": usage_obj.completion_tokens,
|
"completion_tokens": usage_obj.completion_tokens,
|
||||||
"total_tokens": usage_obj.total_tokens,
|
"total_tokens": usage_obj.total_tokens,
|
||||||
"total_cost": cost,
|
"total_cost": cost,
|
||||||
"time_to_first_token": end_time.timestamp() - start_time.timestamp(),
|
"time_to_first_token": end_time.timestamp()
|
||||||
|
- start_time.timestamp(),
|
||||||
"start": start_time.timestamp(),
|
"start": start_time.timestamp(),
|
||||||
"end": end_time.timestamp(),
|
"end": end_time.timestamp(),
|
||||||
}
|
}
|
||||||
|
@ -255,7 +285,9 @@ class BraintrustLogger(CustomLogger):
|
||||||
request_data["metrics"] = metrics
|
request_data["metrics"] = metrics
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print_verbose(f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}")
|
print_verbose(
|
||||||
|
f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}"
|
||||||
|
)
|
||||||
global_braintrust_sync_http_handler.post(
|
global_braintrust_sync_http_handler.post(
|
||||||
url=f"{self.api_base}/project_logs/{project_id}/insert",
|
url=f"{self.api_base}/project_logs/{project_id}/insert",
|
||||||
json={"events": [request_data]},
|
json={"events": [request_data]},
|
||||||
|
@ -276,20 +308,29 @@ class BraintrustLogger(CustomLogger):
|
||||||
output = None
|
output = None
|
||||||
choices = []
|
choices = []
|
||||||
if response_obj is not None and (
|
if response_obj is not None and (
|
||||||
kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
|
kwargs.get("call_type", None) == "embedding"
|
||||||
|
or isinstance(response_obj, litellm.EmbeddingResponse)
|
||||||
):
|
):
|
||||||
output = None
|
output = None
|
||||||
elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ModelResponse
|
||||||
|
):
|
||||||
output = response_obj["choices"][0]["message"].json()
|
output = response_obj["choices"][0]["message"].json()
|
||||||
choices = response_obj["choices"]
|
choices = response_obj["choices"]
|
||||||
elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.TextCompletionResponse
|
||||||
|
):
|
||||||
output = response_obj.choices[0].text
|
output = response_obj.choices[0].text
|
||||||
choices = response_obj.choices
|
choices = response_obj.choices
|
||||||
elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ImageResponse
|
||||||
|
):
|
||||||
output = response_obj["data"]
|
output = response_obj["data"]
|
||||||
|
|
||||||
litellm_params = kwargs.get("litellm_params", {})
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
metadata = litellm_params.get("metadata", {}) or {} # if litellm_params['metadata'] == None
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
) # if litellm_params['metadata'] == None
|
||||||
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
||||||
clean_metadata = {}
|
clean_metadata = {}
|
||||||
new_metadata = {}
|
new_metadata = {}
|
||||||
|
@ -313,7 +354,11 @@ class BraintrustLogger(CustomLogger):
|
||||||
project_id = metadata.get("project_id")
|
project_id = metadata.get("project_id")
|
||||||
if project_id is None:
|
if project_id is None:
|
||||||
project_name = metadata.get("project_name")
|
project_name = metadata.get("project_name")
|
||||||
project_id = await self.get_project_id_async(project_name) if project_name else None
|
project_id = (
|
||||||
|
await self.get_project_id_async(project_name)
|
||||||
|
if project_name
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
if project_id is None:
|
if project_id is None:
|
||||||
if self.default_project_id is None:
|
if self.default_project_id is None:
|
||||||
|
@ -362,8 +407,14 @@ class BraintrustLogger(CustomLogger):
|
||||||
api_call_start_time = kwargs.get("api_call_start_time")
|
api_call_start_time = kwargs.get("api_call_start_time")
|
||||||
completion_start_time = kwargs.get("completion_start_time")
|
completion_start_time = kwargs.get("completion_start_time")
|
||||||
|
|
||||||
if api_call_start_time is not None and completion_start_time is not None:
|
if (
|
||||||
metrics["time_to_first_token"] = completion_start_time.timestamp() - api_call_start_time.timestamp()
|
api_call_start_time is not None
|
||||||
|
and completion_start_time is not None
|
||||||
|
):
|
||||||
|
metrics["time_to_first_token"] = (
|
||||||
|
completion_start_time.timestamp()
|
||||||
|
- api_call_start_time.timestamp()
|
||||||
|
)
|
||||||
|
|
||||||
request_data = {
|
request_data = {
|
||||||
"id": litellm_call_id,
|
"id": litellm_call_id,
|
||||||
|
|
|
@ -14,7 +14,6 @@ from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
||||||
|
|
||||||
class CustomBatchLogger(CustomLogger):
|
class CustomBatchLogger(CustomLogger):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
flush_lock: Optional[asyncio.Lock] = None,
|
flush_lock: Optional[asyncio.Lock] = None,
|
||||||
|
|
|
@ -7,7 +7,6 @@ from litellm.types.utils import StandardLoggingGuardrailInformation
|
||||||
|
|
||||||
|
|
||||||
class CustomGuardrail(CustomLogger):
|
class CustomGuardrail(CustomLogger):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
guardrail_name: Optional[str] = None,
|
guardrail_name: Optional[str] = None,
|
||||||
|
|
|
@ -31,7 +31,7 @@ from litellm.types.utils import (
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
|
@ -233,7 +233,6 @@ class DataDogLogger(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def _log_async_event(self, kwargs, response_obj, start_time, end_time):
|
async def _log_async_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
|
||||||
dd_payload = self.create_datadog_logging_payload(
|
dd_payload = self.create_datadog_logging_payload(
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
response_obj=response_obj,
|
response_obj=response_obj,
|
||||||
|
|
|
@ -125,9 +125,9 @@ class GCSBucketBase(CustomBatchLogger):
|
||||||
if kwargs is None:
|
if kwargs is None:
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
|
||||||
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
|
standard_callback_dynamic_params: Optional[
|
||||||
kwargs.get("standard_callback_dynamic_params", None)
|
StandardCallbackDynamicParams
|
||||||
)
|
] = kwargs.get("standard_callback_dynamic_params", None)
|
||||||
|
|
||||||
bucket_name: str
|
bucket_name: str
|
||||||
path_service_account: Optional[str]
|
path_service_account: Optional[str]
|
||||||
|
|
|
@ -70,13 +70,14 @@ class GcsPubSubLogger(CustomBatchLogger):
|
||||||
"""Construct authorization headers using Vertex AI auth"""
|
"""Construct authorization headers using Vertex AI auth"""
|
||||||
from litellm import vertex_chat_completion
|
from litellm import vertex_chat_completion
|
||||||
|
|
||||||
_auth_header, vertex_project = (
|
(
|
||||||
await vertex_chat_completion._ensure_access_token_async(
|
_auth_header,
|
||||||
|
vertex_project,
|
||||||
|
) = await vertex_chat_completion._ensure_access_token_async(
|
||||||
credentials=self.path_service_account_json,
|
credentials=self.path_service_account_json,
|
||||||
project_id=None,
|
project_id=None,
|
||||||
custom_llm_provider="vertex_ai",
|
custom_llm_provider="vertex_ai",
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
auth_header, _ = vertex_chat_completion._get_token_and_url(
|
auth_header, _ = vertex_chat_completion._get_token_and_url(
|
||||||
model="pub-sub",
|
model="pub-sub",
|
||||||
|
|
|
@ -155,11 +155,7 @@ class HumanloopLogger(CustomLogger):
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
prompt_variables: Optional[dict],
|
prompt_variables: Optional[dict],
|
||||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||||
) -> Tuple[
|
) -> Tuple[str, List[AllMessageValues], dict,]:
|
||||||
str,
|
|
||||||
List[AllMessageValues],
|
|
||||||
dict,
|
|
||||||
]:
|
|
||||||
humanloop_api_key = dynamic_callback_params.get(
|
humanloop_api_key = dynamic_callback_params.get(
|
||||||
"humanloop_api_key"
|
"humanloop_api_key"
|
||||||
) or get_secret_str("HUMANLOOP_API_KEY")
|
) or get_secret_str("HUMANLOOP_API_KEY")
|
||||||
|
|
|
@ -471,9 +471,9 @@ class LangFuseLogger:
|
||||||
# we clean out all extra litellm metadata params before logging
|
# we clean out all extra litellm metadata params before logging
|
||||||
clean_metadata: Dict[str, Any] = {}
|
clean_metadata: Dict[str, Any] = {}
|
||||||
if prompt_management_metadata is not None:
|
if prompt_management_metadata is not None:
|
||||||
clean_metadata["prompt_management_metadata"] = (
|
clean_metadata[
|
||||||
prompt_management_metadata
|
"prompt_management_metadata"
|
||||||
)
|
] = prompt_management_metadata
|
||||||
if isinstance(metadata, dict):
|
if isinstance(metadata, dict):
|
||||||
for key, value in metadata.items():
|
for key, value in metadata.items():
|
||||||
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
|
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
|
||||||
|
|
|
@ -19,7 +19,6 @@ else:
|
||||||
|
|
||||||
|
|
||||||
class LangFuseHandler:
|
class LangFuseHandler:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_langfuse_logger_for_request(
|
def get_langfuse_logger_for_request(
|
||||||
standard_callback_dynamic_params: StandardCallbackDynamicParams,
|
standard_callback_dynamic_params: StandardCallbackDynamicParams,
|
||||||
|
@ -87,7 +86,9 @@ class LangFuseHandler:
|
||||||
if globalLangfuseLogger is not None:
|
if globalLangfuseLogger is not None:
|
||||||
return globalLangfuseLogger
|
return globalLangfuseLogger
|
||||||
|
|
||||||
credentials_dict: Dict[str, Any] = (
|
credentials_dict: Dict[
|
||||||
|
str, Any
|
||||||
|
] = (
|
||||||
{}
|
{}
|
||||||
) # the global langfuse logger uses Environment Variables, there are no dynamic credentials
|
) # the global langfuse logger uses Environment Variables, there are no dynamic credentials
|
||||||
globalLangfuseLogger = in_memory_dynamic_logger_cache.get_cache(
|
globalLangfuseLogger = in_memory_dynamic_logger_cache.get_cache(
|
||||||
|
|
|
@ -172,11 +172,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
prompt_variables: Optional[dict],
|
prompt_variables: Optional[dict],
|
||||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||||
) -> Tuple[
|
) -> Tuple[str, List[AllMessageValues], dict,]:
|
||||||
str,
|
|
||||||
List[AllMessageValues],
|
|
||||||
dict,
|
|
||||||
]:
|
|
||||||
return self.get_chat_completion_prompt(
|
return self.get_chat_completion_prompt(
|
||||||
model,
|
model,
|
||||||
messages,
|
messages,
|
||||||
|
|
|
@ -75,7 +75,6 @@ class LangsmithLogger(CustomBatchLogger):
|
||||||
langsmith_project: Optional[str] = None,
|
langsmith_project: Optional[str] = None,
|
||||||
langsmith_base_url: Optional[str] = None,
|
langsmith_base_url: Optional[str] = None,
|
||||||
) -> LangsmithCredentialsObject:
|
) -> LangsmithCredentialsObject:
|
||||||
|
|
||||||
_credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY")
|
_credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY")
|
||||||
if _credentials_api_key is None:
|
if _credentials_api_key is None:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
|
@ -443,9 +442,9 @@ class LangsmithLogger(CustomBatchLogger):
|
||||||
|
|
||||||
Otherwise, use the default credentials.
|
Otherwise, use the default credentials.
|
||||||
"""
|
"""
|
||||||
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
|
standard_callback_dynamic_params: Optional[
|
||||||
kwargs.get("standard_callback_dynamic_params", None)
|
StandardCallbackDynamicParams
|
||||||
)
|
] = kwargs.get("standard_callback_dynamic_params", None)
|
||||||
if standard_callback_dynamic_params is not None:
|
if standard_callback_dynamic_params is not None:
|
||||||
credentials = self.get_credentials_from_env(
|
credentials = self.get_credentials_from_env(
|
||||||
langsmith_api_key=standard_callback_dynamic_params.get(
|
langsmith_api_key=standard_callback_dynamic_params.get(
|
||||||
|
@ -481,7 +480,6 @@ class LangsmithLogger(CustomBatchLogger):
|
||||||
asyncio.run(self.async_send_batch())
|
asyncio.run(self.async_send_batch())
|
||||||
|
|
||||||
def get_run_by_id(self, run_id):
|
def get_run_by_id(self, run_id):
|
||||||
|
|
||||||
langsmith_api_key = self.default_credentials["LANGSMITH_API_KEY"]
|
langsmith_api_key = self.default_credentials["LANGSMITH_API_KEY"]
|
||||||
|
|
||||||
langsmith_api_base = self.default_credentials["LANGSMITH_BASE_URL"]
|
langsmith_api_base = self.default_credentials["LANGSMITH_BASE_URL"]
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
import json
|
import json
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any, Union
|
||||||
|
|
||||||
from litellm.proxy._types import SpanAttributes
|
from litellm.proxy._types import SpanAttributes
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ def parse_tool_calls(tool_calls):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def clean_tool_call(tool_call):
|
def clean_tool_call(tool_call):
|
||||||
|
|
||||||
serialized = {
|
serialized = {
|
||||||
"type": tool_call.type,
|
"type": tool_call.type,
|
||||||
"id": tool_call.id,
|
"id": tool_call.id,
|
||||||
|
@ -36,7 +35,6 @@ def parse_tool_calls(tool_calls):
|
||||||
|
|
||||||
|
|
||||||
def parse_messages(input):
|
def parse_messages(input):
|
||||||
|
|
||||||
if input is None:
|
if input is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -48,14 +48,17 @@ class MlflowLogger(CustomLogger):
|
||||||
|
|
||||||
def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
|
def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
|
||||||
try:
|
try:
|
||||||
from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools
|
from mlflow.tracing.utils import set_span_chat_messages # type: ignore
|
||||||
|
from mlflow.tracing.utils import set_span_chat_tools # type: ignore
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return
|
return
|
||||||
|
|
||||||
inputs = self._construct_input(kwargs)
|
inputs = self._construct_input(kwargs)
|
||||||
input_messages = inputs.get("messages", [])
|
input_messages = inputs.get("messages", [])
|
||||||
output_messages = [c.message.model_dump(exclude_none=True)
|
output_messages = [
|
||||||
for c in getattr(response_obj, "choices", [])]
|
c.message.model_dump(exclude_none=True)
|
||||||
|
for c in getattr(response_obj, "choices", [])
|
||||||
|
]
|
||||||
if messages := [*input_messages, *output_messages]:
|
if messages := [*input_messages, *output_messages]:
|
||||||
set_span_chat_messages(span, messages)
|
set_span_chat_messages(span, messages)
|
||||||
if tools := inputs.get("tools"):
|
if tools := inputs.get("tools"):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
@ -23,10 +23,10 @@ if TYPE_CHECKING:
|
||||||
)
|
)
|
||||||
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
|
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
SpanExporter = _SpanExporter
|
SpanExporter = Union[_SpanExporter, Any]
|
||||||
UserAPIKeyAuth = _UserAPIKeyAuth
|
UserAPIKeyAuth = Union[_UserAPIKeyAuth, Any]
|
||||||
ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload
|
ManagementEndpointLoggingPayload = Union[_ManagementEndpointLoggingPayload, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
SpanExporter = Any
|
SpanExporter = Any
|
||||||
|
@ -46,7 +46,6 @@ LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class OpenTelemetryConfig:
|
class OpenTelemetryConfig:
|
||||||
|
|
||||||
exporter: Union[str, SpanExporter] = "console"
|
exporter: Union[str, SpanExporter] = "console"
|
||||||
endpoint: Optional[str] = None
|
endpoint: Optional[str] = None
|
||||||
headers: Optional[str] = None
|
headers: Optional[str] = None
|
||||||
|
@ -154,7 +153,6 @@ class OpenTelemetry(CustomLogger):
|
||||||
end_time: Optional[Union[datetime, float]] = None,
|
end_time: Optional[Union[datetime, float]] = None,
|
||||||
event_metadata: Optional[dict] = None,
|
event_metadata: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
|
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
|
@ -215,7 +213,6 @@ class OpenTelemetry(CustomLogger):
|
||||||
end_time: Optional[Union[float, datetime]] = None,
|
end_time: Optional[Union[float, datetime]] = None,
|
||||||
event_metadata: Optional[dict] = None,
|
event_metadata: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
|
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
|
@ -353,9 +350,9 @@ class OpenTelemetry(CustomLogger):
|
||||||
"""
|
"""
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
|
|
||||||
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
|
standard_callback_dynamic_params: Optional[
|
||||||
kwargs.get("standard_callback_dynamic_params")
|
StandardCallbackDynamicParams
|
||||||
)
|
] = kwargs.get("standard_callback_dynamic_params")
|
||||||
if not standard_callback_dynamic_params:
|
if not standard_callback_dynamic_params:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -722,7 +719,6 @@ class OpenTelemetry(CustomLogger):
|
||||||
span.set_attribute(key, primitive_value)
|
span.set_attribute(key, primitive_value)
|
||||||
|
|
||||||
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
|
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
|
||||||
|
|
||||||
kwargs.get("optional_params", {})
|
kwargs.get("optional_params", {})
|
||||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
|
custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
|
||||||
|
@ -843,12 +839,14 @@ class OpenTelemetry(CustomLogger):
|
||||||
headers=dynamic_headers or self.OTEL_HEADERS
|
headers=dynamic_headers or self.OTEL_HEADERS
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(self.OTEL_EXPORTER, SpanExporter):
|
if hasattr(
|
||||||
|
self.OTEL_EXPORTER, "export"
|
||||||
|
): # Check if it has the export method that SpanExporter requires
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s",
|
"OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s",
|
||||||
self.OTEL_EXPORTER,
|
self.OTEL_EXPORTER,
|
||||||
)
|
)
|
||||||
return SimpleSpanProcessor(self.OTEL_EXPORTER)
|
return SimpleSpanProcessor(cast(SpanExporter, self.OTEL_EXPORTER))
|
||||||
|
|
||||||
if self.OTEL_EXPORTER == "console":
|
if self.OTEL_EXPORTER == "console":
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
|
@ -907,7 +905,6 @@ class OpenTelemetry(CustomLogger):
|
||||||
logging_payload: ManagementEndpointLoggingPayload,
|
logging_payload: ManagementEndpointLoggingPayload,
|
||||||
parent_otel_span: Optional[Span] = None,
|
parent_otel_span: Optional[Span] = None,
|
||||||
):
|
):
|
||||||
|
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
|
@ -961,7 +958,6 @@ class OpenTelemetry(CustomLogger):
|
||||||
logging_payload: ManagementEndpointLoggingPayload,
|
logging_payload: ManagementEndpointLoggingPayload,
|
||||||
parent_otel_span: Optional[Span] = None,
|
parent_otel_span: Optional[Span] = None,
|
||||||
):
|
):
|
||||||
|
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
|
|
|
@ -185,7 +185,6 @@ class OpikLogger(CustomBatchLogger):
|
||||||
def _create_opik_payload( # noqa: PLR0915
|
def _create_opik_payload( # noqa: PLR0915
|
||||||
self, kwargs, response_obj, start_time, end_time
|
self, kwargs, response_obj, start_time, end_time
|
||||||
) -> List[Dict]:
|
) -> List[Dict]:
|
||||||
|
|
||||||
# Get metadata
|
# Get metadata
|
||||||
_litellm_params = kwargs.get("litellm_params", {}) or {}
|
_litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
litellm_params_metadata = _litellm_params.get("metadata", {}) or {}
|
litellm_params_metadata = _litellm_params.get("metadata", {}) or {}
|
||||||
|
|
|
@ -988,9 +988,9 @@ class PrometheusLogger(CustomLogger):
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
verbose_logger.debug("setting remaining tokens requests metric")
|
verbose_logger.debug("setting remaining tokens requests metric")
|
||||||
standard_logging_payload: Optional[StandardLoggingPayload] = (
|
standard_logging_payload: Optional[
|
||||||
request_kwargs.get("standard_logging_object")
|
StandardLoggingPayload
|
||||||
)
|
] = request_kwargs.get("standard_logging_object")
|
||||||
|
|
||||||
if standard_logging_payload is None:
|
if standard_logging_payload is None:
|
||||||
return
|
return
|
||||||
|
|
|
@ -14,7 +14,6 @@ class PromptManagementClient(TypedDict):
|
||||||
|
|
||||||
|
|
||||||
class PromptManagementBase(ABC):
|
class PromptManagementBase(ABC):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def integration_name(self) -> str:
|
def integration_name(self) -> str:
|
||||||
|
@ -83,11 +82,7 @@ class PromptManagementBase(ABC):
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
prompt_variables: Optional[dict],
|
prompt_variables: Optional[dict],
|
||||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||||
) -> Tuple[
|
) -> Tuple[str, List[AllMessageValues], dict,]:
|
||||||
str,
|
|
||||||
List[AllMessageValues],
|
|
||||||
dict,
|
|
||||||
]:
|
|
||||||
if not self.should_run_prompt_management(
|
if not self.should_run_prompt_management(
|
||||||
prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params
|
prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params
|
||||||
):
|
):
|
||||||
|
|
|
@ -38,7 +38,7 @@ class S3Logger:
|
||||||
if litellm.s3_callback_params is not None:
|
if litellm.s3_callback_params is not None:
|
||||||
# read in .env variables - example os.environ/AWS_BUCKET_NAME
|
# read in .env variables - example os.environ/AWS_BUCKET_NAME
|
||||||
for key, value in litellm.s3_callback_params.items():
|
for key, value in litellm.s3_callback_params.items():
|
||||||
if type(value) is str and value.startswith("os.environ/"):
|
if isinstance(value, str) and value.startswith("os.environ/"):
|
||||||
litellm.s3_callback_params[key] = litellm.get_secret(value)
|
litellm.s3_callback_params[key] = litellm.get_secret(value)
|
||||||
# now set s3 params from litellm.s3_logger_params
|
# now set s3 params from litellm.s3_logger_params
|
||||||
s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")
|
s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")
|
||||||
|
|
|
@ -21,11 +21,11 @@ try:
|
||||||
# contains a (known) object attribute
|
# contains a (known) object attribute
|
||||||
object: Literal["chat.completion", "edit", "text_completion"]
|
object: Literal["chat.completion", "edit", "text_completion"]
|
||||||
|
|
||||||
def __getitem__(self, key: K) -> V: ... # noqa
|
def __getitem__(self, key: K) -> V:
|
||||||
|
... # noqa
|
||||||
|
|
||||||
def get( # noqa
|
def get(self, key: K, default: Optional[V] = None) -> Optional[V]: # noqa
|
||||||
self, key: K, default: Optional[V] = None
|
... # pragma: no cover
|
||||||
) -> Optional[V]: ... # pragma: no cover
|
|
||||||
|
|
||||||
class OpenAIRequestResponseResolver:
|
class OpenAIRequestResponseResolver:
|
||||||
def __call__(
|
def __call__(
|
||||||
|
|
|
@ -10,7 +10,7 @@ from litellm.types.llms.openai import AllMessageValues
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
Span = _Span
|
Span = Union[_Span, Any]
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,9 @@ except (ImportError, AttributeError):
|
||||||
# Old way to access resources, which setuptools deprecated some time ago
|
# Old way to access resources, which setuptools deprecated some time ago
|
||||||
import pkg_resources # type: ignore
|
import pkg_resources # type: ignore
|
||||||
|
|
||||||
filename = pkg_resources.resource_filename(__name__, "litellm_core_utils/tokenizers")
|
filename = pkg_resources.resource_filename(
|
||||||
|
__name__, "litellm_core_utils/tokenizers"
|
||||||
|
)
|
||||||
|
|
||||||
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
|
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
|
||||||
"CUSTOM_TIKTOKEN_CACHE_DIR", filename
|
"CUSTOM_TIKTOKEN_CACHE_DIR", filename
|
||||||
|
|
|
@ -79,6 +79,22 @@ def get_supported_openai_params( # noqa: PLR0915
|
||||||
elif custom_llm_provider == "maritalk":
|
elif custom_llm_provider == "maritalk":
|
||||||
return litellm.MaritalkConfig().get_supported_openai_params(model=model)
|
return litellm.MaritalkConfig().get_supported_openai_params(model=model)
|
||||||
elif custom_llm_provider == "openai":
|
elif custom_llm_provider == "openai":
|
||||||
|
if request_type == "transcription":
|
||||||
|
transcription_provider_config = (
|
||||||
|
litellm.ProviderConfigManager.get_provider_audio_transcription_config(
|
||||||
|
model=model, provider=LlmProviders.OPENAI
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if isinstance(
|
||||||
|
transcription_provider_config, litellm.OpenAIGPTAudioTranscriptionConfig
|
||||||
|
):
|
||||||
|
return transcription_provider_config.get_supported_openai_params(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unsupported provider config: {transcription_provider_config} for model: {model}"
|
||||||
|
)
|
||||||
return litellm.OpenAIConfig().get_supported_openai_params(model=model)
|
return litellm.OpenAIConfig().get_supported_openai_params(model=model)
|
||||||
elif custom_llm_provider == "azure":
|
elif custom_llm_provider == "azure":
|
||||||
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
|
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
|
||||||
|
|
|
@ -67,6 +67,7 @@ from litellm.types.utils import (
|
||||||
StandardCallbackDynamicParams,
|
StandardCallbackDynamicParams,
|
||||||
StandardLoggingAdditionalHeaders,
|
StandardLoggingAdditionalHeaders,
|
||||||
StandardLoggingHiddenParams,
|
StandardLoggingHiddenParams,
|
||||||
|
StandardLoggingMCPToolCall,
|
||||||
StandardLoggingMetadata,
|
StandardLoggingMetadata,
|
||||||
StandardLoggingModelCostFailureDebugInformation,
|
StandardLoggingModelCostFailureDebugInformation,
|
||||||
StandardLoggingModelInformation,
|
StandardLoggingModelInformation,
|
||||||
|
@ -239,9 +240,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
self.litellm_trace_id = litellm_trace_id
|
self.litellm_trace_id = litellm_trace_id
|
||||||
self.function_id = function_id
|
self.function_id = function_id
|
||||||
self.streaming_chunks: List[Any] = [] # for generating complete stream response
|
self.streaming_chunks: List[Any] = [] # for generating complete stream response
|
||||||
self.sync_streaming_chunks: List[Any] = (
|
self.sync_streaming_chunks: List[
|
||||||
[]
|
Any
|
||||||
) # for generating complete stream response
|
] = [] # for generating complete stream response
|
||||||
self.log_raw_request_response = log_raw_request_response
|
self.log_raw_request_response = log_raw_request_response
|
||||||
|
|
||||||
# Initialize dynamic callbacks
|
# Initialize dynamic callbacks
|
||||||
|
@ -452,11 +453,13 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
prompt_variables: Optional[dict],
|
prompt_variables: Optional[dict],
|
||||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||||
|
|
||||||
custom_logger = self.get_custom_logger_for_prompt_management(model)
|
custom_logger = self.get_custom_logger_for_prompt_management(model)
|
||||||
if custom_logger:
|
if custom_logger:
|
||||||
model, messages, non_default_params = (
|
(
|
||||||
custom_logger.get_chat_completion_prompt(
|
model,
|
||||||
|
messages,
|
||||||
|
non_default_params,
|
||||||
|
) = custom_logger.get_chat_completion_prompt(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
|
@ -464,7 +467,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
prompt_variables=prompt_variables,
|
prompt_variables=prompt_variables,
|
||||||
dynamic_callback_params=self.standard_callback_dynamic_params,
|
dynamic_callback_params=self.standard_callback_dynamic_params,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
self.messages = messages
|
self.messages = messages
|
||||||
return model, messages, non_default_params
|
return model, messages, non_default_params
|
||||||
|
|
||||||
|
@ -541,12 +543,11 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
model
|
model
|
||||||
): # if model name was changes pre-call, overwrite the initial model call name with the new one
|
): # if model name was changes pre-call, overwrite the initial model call name with the new one
|
||||||
self.model_call_details["model"] = model
|
self.model_call_details["model"] = model
|
||||||
self.model_call_details["litellm_params"]["api_base"] = (
|
self.model_call_details["litellm_params"][
|
||||||
self._get_masked_api_base(additional_args.get("api_base", ""))
|
"api_base"
|
||||||
)
|
] = self._get_masked_api_base(additional_args.get("api_base", ""))
|
||||||
|
|
||||||
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
|
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
|
||||||
|
|
||||||
# Log the exact input to the LLM API
|
# Log the exact input to the LLM API
|
||||||
litellm.error_logs["PRE_CALL"] = locals()
|
litellm.error_logs["PRE_CALL"] = locals()
|
||||||
try:
|
try:
|
||||||
|
@ -568,19 +569,16 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
self.log_raw_request_response is True
|
self.log_raw_request_response is True
|
||||||
or log_raw_request_response is True
|
or log_raw_request_response is True
|
||||||
):
|
):
|
||||||
|
|
||||||
_litellm_params = self.model_call_details.get("litellm_params", {})
|
_litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||||
try:
|
try:
|
||||||
# [Non-blocking Extra Debug Information in metadata]
|
# [Non-blocking Extra Debug Information in metadata]
|
||||||
if turn_off_message_logging is True:
|
if turn_off_message_logging is True:
|
||||||
|
_metadata[
|
||||||
_metadata["raw_request"] = (
|
"raw_request"
|
||||||
"redacted by litellm. \
|
] = "redacted by litellm. \
|
||||||
'litellm.turn_off_message_logging=True'"
|
'litellm.turn_off_message_logging=True'"
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
curl_command = self._get_request_curl_command(
|
curl_command = self._get_request_curl_command(
|
||||||
api_base=additional_args.get("api_base", ""),
|
api_base=additional_args.get("api_base", ""),
|
||||||
headers=additional_args.get("headers", {}),
|
headers=additional_args.get("headers", {}),
|
||||||
|
@ -590,8 +588,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
|
|
||||||
_metadata["raw_request"] = str(curl_command)
|
_metadata["raw_request"] = str(curl_command)
|
||||||
# split up, so it's easier to parse in the UI
|
# split up, so it's easier to parse in the UI
|
||||||
self.model_call_details["raw_request_typed_dict"] = (
|
self.model_call_details[
|
||||||
RawRequestTypedDict(
|
"raw_request_typed_dict"
|
||||||
|
] = RawRequestTypedDict(
|
||||||
raw_request_api_base=str(
|
raw_request_api_base=str(
|
||||||
additional_args.get("api_base") or ""
|
additional_args.get("api_base") or ""
|
||||||
),
|
),
|
||||||
|
@ -604,20 +603,19 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
),
|
),
|
||||||
error=None,
|
error=None,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.model_call_details["raw_request_typed_dict"] = (
|
self.model_call_details[
|
||||||
RawRequestTypedDict(
|
"raw_request_typed_dict"
|
||||||
|
] = RawRequestTypedDict(
|
||||||
error=str(e),
|
error=str(e),
|
||||||
)
|
)
|
||||||
)
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
_metadata["raw_request"] = (
|
_metadata[
|
||||||
"Unable to Log \
|
"raw_request"
|
||||||
|
] = "Unable to Log \
|
||||||
raw request: {}".format(
|
raw request: {}".format(
|
||||||
str(e)
|
str(e)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
if self.logger_fn and callable(self.logger_fn):
|
if self.logger_fn and callable(self.logger_fn):
|
||||||
try:
|
try:
|
||||||
self.logger_fn(
|
self.logger_fn(
|
||||||
|
@ -941,9 +939,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"response_cost_failure_debug_information: {debug_info}"
|
f"response_cost_failure_debug_information: {debug_info}"
|
||||||
)
|
)
|
||||||
self.model_call_details["response_cost_failure_debug_information"] = (
|
self.model_call_details[
|
||||||
debug_info
|
"response_cost_failure_debug_information"
|
||||||
)
|
] = debug_info
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -968,9 +966,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"response_cost_failure_debug_information: {debug_info}"
|
f"response_cost_failure_debug_information: {debug_info}"
|
||||||
)
|
)
|
||||||
self.model_call_details["response_cost_failure_debug_information"] = (
|
self.model_call_details[
|
||||||
debug_info
|
"response_cost_failure_debug_information"
|
||||||
)
|
] = debug_info
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -995,7 +993,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
def should_run_callback(
|
def should_run_callback(
|
||||||
self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
|
self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
|
||||||
) -> bool:
|
) -> bool:
|
||||||
|
|
||||||
if litellm.global_disable_no_log_param:
|
if litellm.global_disable_no_log_param:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -1027,9 +1024,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
end_time = datetime.datetime.now()
|
end_time = datetime.datetime.now()
|
||||||
if self.completion_start_time is None:
|
if self.completion_start_time is None:
|
||||||
self.completion_start_time = end_time
|
self.completion_start_time = end_time
|
||||||
self.model_call_details["completion_start_time"] = (
|
self.model_call_details[
|
||||||
self.completion_start_time
|
"completion_start_time"
|
||||||
)
|
] = self.completion_start_time
|
||||||
self.model_call_details["log_event_type"] = "successful_api_call"
|
self.model_call_details["log_event_type"] = "successful_api_call"
|
||||||
self.model_call_details["end_time"] = end_time
|
self.model_call_details["end_time"] = end_time
|
||||||
self.model_call_details["cache_hit"] = cache_hit
|
self.model_call_details["cache_hit"] = cache_hit
|
||||||
|
@ -1083,13 +1080,14 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
"response_cost"
|
"response_cost"
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
self.model_call_details["response_cost"] = (
|
self.model_call_details[
|
||||||
self._response_cost_calculator(result=result)
|
"response_cost"
|
||||||
)
|
] = self._response_cost_calculator(result=result)
|
||||||
## STANDARDIZED LOGGING PAYLOAD
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
|
|
||||||
self.model_call_details["standard_logging_object"] = (
|
self.model_call_details[
|
||||||
get_standard_logging_object_payload(
|
"standard_logging_object"
|
||||||
|
] = get_standard_logging_object_payload(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
init_response_obj=result,
|
init_response_obj=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
|
@ -1098,11 +1096,11 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
status="success",
|
status="success",
|
||||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||||
)
|
)
|
||||||
)
|
elif isinstance(result, dict) or isinstance(result, list):
|
||||||
elif isinstance(result, dict): # pass-through endpoints
|
|
||||||
## STANDARDIZED LOGGING PAYLOAD
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
self.model_call_details["standard_logging_object"] = (
|
self.model_call_details[
|
||||||
get_standard_logging_object_payload(
|
"standard_logging_object"
|
||||||
|
] = get_standard_logging_object_payload(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
init_response_obj=result,
|
init_response_obj=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
|
@ -1111,11 +1109,10 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
status="success",
|
status="success",
|
||||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
elif standard_logging_object is not None:
|
elif standard_logging_object is not None:
|
||||||
self.model_call_details["standard_logging_object"] = (
|
self.model_call_details[
|
||||||
standard_logging_object
|
"standard_logging_object"
|
||||||
)
|
] = standard_logging_object
|
||||||
else: # streaming chunks + image gen.
|
else: # streaming chunks + image gen.
|
||||||
self.model_call_details["response_cost"] = None
|
self.model_call_details["response_cost"] = None
|
||||||
|
|
||||||
|
@ -1154,7 +1151,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
standard_logging_object=kwargs.get("standard_logging_object", None),
|
standard_logging_object=kwargs.get("standard_logging_object", None),
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
|
||||||
## BUILD COMPLETE STREAMED RESPONSE
|
## BUILD COMPLETE STREAMED RESPONSE
|
||||||
complete_streaming_response: Optional[
|
complete_streaming_response: Optional[
|
||||||
Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
|
Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
|
||||||
|
@ -1172,15 +1168,16 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"Logging Details LiteLLM-Success Call streaming complete"
|
"Logging Details LiteLLM-Success Call streaming complete"
|
||||||
)
|
)
|
||||||
self.model_call_details["complete_streaming_response"] = (
|
self.model_call_details[
|
||||||
complete_streaming_response
|
"complete_streaming_response"
|
||||||
)
|
] = complete_streaming_response
|
||||||
self.model_call_details["response_cost"] = (
|
self.model_call_details[
|
||||||
self._response_cost_calculator(result=complete_streaming_response)
|
"response_cost"
|
||||||
)
|
] = self._response_cost_calculator(result=complete_streaming_response)
|
||||||
## STANDARDIZED LOGGING PAYLOAD
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
self.model_call_details["standard_logging_object"] = (
|
self.model_call_details[
|
||||||
get_standard_logging_object_payload(
|
"standard_logging_object"
|
||||||
|
] = get_standard_logging_object_payload(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
init_response_obj=complete_streaming_response,
|
init_response_obj=complete_streaming_response,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
|
@ -1189,7 +1186,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
status="success",
|
status="success",
|
||||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
callbacks = self.get_combined_callback_list(
|
callbacks = self.get_combined_callback_list(
|
||||||
dynamic_success_callbacks=self.dynamic_success_callbacks,
|
dynamic_success_callbacks=self.dynamic_success_callbacks,
|
||||||
global_callbacks=litellm.success_callback,
|
global_callbacks=litellm.success_callback,
|
||||||
|
@ -1207,7 +1203,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
## LOGGING HOOK ##
|
## LOGGING HOOK ##
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
if isinstance(callback, CustomLogger):
|
if isinstance(callback, CustomLogger):
|
||||||
|
|
||||||
self.model_call_details, result = callback.logging_hook(
|
self.model_call_details, result = callback.logging_hook(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
result=result,
|
result=result,
|
||||||
|
@ -1538,11 +1533,11 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if self.stream and complete_streaming_response:
|
if self.stream and complete_streaming_response:
|
||||||
self.model_call_details["complete_response"] = (
|
self.model_call_details[
|
||||||
self.model_call_details.get(
|
"complete_response"
|
||||||
|
] = self.model_call_details.get(
|
||||||
"complete_streaming_response", {}
|
"complete_streaming_response", {}
|
||||||
)
|
)
|
||||||
)
|
|
||||||
result = self.model_call_details["complete_response"]
|
result = self.model_call_details["complete_response"]
|
||||||
openMeterLogger.log_success_event(
|
openMeterLogger.log_success_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
|
@ -1581,11 +1576,11 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if self.stream and complete_streaming_response:
|
if self.stream and complete_streaming_response:
|
||||||
self.model_call_details["complete_response"] = (
|
self.model_call_details[
|
||||||
self.model_call_details.get(
|
"complete_response"
|
||||||
|
] = self.model_call_details.get(
|
||||||
"complete_streaming_response", {}
|
"complete_streaming_response", {}
|
||||||
)
|
)
|
||||||
)
|
|
||||||
result = self.model_call_details["complete_response"]
|
result = self.model_call_details["complete_response"]
|
||||||
|
|
||||||
callback.log_success_event(
|
callback.log_success_event(
|
||||||
|
@ -1659,7 +1654,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
if self.call_type == CallTypes.aretrieve_batch.value and isinstance(
|
if self.call_type == CallTypes.aretrieve_batch.value and isinstance(
|
||||||
result, LiteLLMBatch
|
result, LiteLLMBatch
|
||||||
):
|
):
|
||||||
|
|
||||||
response_cost, batch_usage, batch_models = await _handle_completed_batch(
|
response_cost, batch_usage, batch_models = await _handle_completed_batch(
|
||||||
batch=result, custom_llm_provider=self.custom_llm_provider
|
batch=result, custom_llm_provider=self.custom_llm_provider
|
||||||
)
|
)
|
||||||
|
@ -1692,9 +1686,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
if complete_streaming_response is not None:
|
if complete_streaming_response is not None:
|
||||||
print_verbose("Async success callbacks: Got a complete streaming response")
|
print_verbose("Async success callbacks: Got a complete streaming response")
|
||||||
|
|
||||||
self.model_call_details["async_complete_streaming_response"] = (
|
self.model_call_details[
|
||||||
complete_streaming_response
|
"async_complete_streaming_response"
|
||||||
)
|
] = complete_streaming_response
|
||||||
try:
|
try:
|
||||||
if self.model_call_details.get("cache_hit", False) is True:
|
if self.model_call_details.get("cache_hit", False) is True:
|
||||||
self.model_call_details["response_cost"] = 0.0
|
self.model_call_details["response_cost"] = 0.0
|
||||||
|
@ -1704,11 +1698,11 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
model_call_details=self.model_call_details
|
model_call_details=self.model_call_details
|
||||||
)
|
)
|
||||||
# base_model defaults to None if not set on model_info
|
# base_model defaults to None if not set on model_info
|
||||||
self.model_call_details["response_cost"] = (
|
self.model_call_details[
|
||||||
self._response_cost_calculator(
|
"response_cost"
|
||||||
|
] = self._response_cost_calculator(
|
||||||
result=complete_streaming_response
|
result=complete_streaming_response
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
||||||
|
@ -1720,8 +1714,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
self.model_call_details["response_cost"] = None
|
self.model_call_details["response_cost"] = None
|
||||||
|
|
||||||
## STANDARDIZED LOGGING PAYLOAD
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
self.model_call_details["standard_logging_object"] = (
|
self.model_call_details[
|
||||||
get_standard_logging_object_payload(
|
"standard_logging_object"
|
||||||
|
] = get_standard_logging_object_payload(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
init_response_obj=complete_streaming_response,
|
init_response_obj=complete_streaming_response,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
|
@ -1730,7 +1725,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
status="success",
|
status="success",
|
||||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
callbacks = self.get_combined_callback_list(
|
callbacks = self.get_combined_callback_list(
|
||||||
dynamic_success_callbacks=self.dynamic_async_success_callbacks,
|
dynamic_success_callbacks=self.dynamic_async_success_callbacks,
|
||||||
global_callbacks=litellm._async_success_callback,
|
global_callbacks=litellm._async_success_callback,
|
||||||
|
@ -1935,8 +1929,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
|
|
||||||
## STANDARDIZED LOGGING PAYLOAD
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
|
|
||||||
self.model_call_details["standard_logging_object"] = (
|
self.model_call_details[
|
||||||
get_standard_logging_object_payload(
|
"standard_logging_object"
|
||||||
|
] = get_standard_logging_object_payload(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
init_response_obj={},
|
init_response_obj={},
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
|
@ -1947,7 +1942,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
original_exception=exception,
|
original_exception=exception,
|
||||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
return start_time, end_time
|
return start_time, end_time
|
||||||
|
|
||||||
async def special_failure_handlers(self, exception: Exception):
|
async def special_failure_handlers(self, exception: Exception):
|
||||||
|
@ -2084,7 +2078,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
)
|
)
|
||||||
is not True
|
is not True
|
||||||
): # custom logger class
|
): # custom logger class
|
||||||
|
|
||||||
callback.log_failure_event(
|
callback.log_failure_event(
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
@ -2713,9 +2706,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
||||||
endpoint=arize_config.endpoint,
|
endpoint=arize_config.endpoint,
|
||||||
)
|
)
|
||||||
|
|
||||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
os.environ[
|
||||||
f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
|
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
|
||||||
)
|
] = f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if (
|
if (
|
||||||
isinstance(callback, ArizeLogger)
|
isinstance(callback, ArizeLogger)
|
||||||
|
@ -2739,9 +2732,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
||||||
|
|
||||||
# auth can be disabled on local deployments of arize phoenix
|
# auth can be disabled on local deployments of arize phoenix
|
||||||
if arize_phoenix_config.otlp_auth_headers is not None:
|
if arize_phoenix_config.otlp_auth_headers is not None:
|
||||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
os.environ[
|
||||||
arize_phoenix_config.otlp_auth_headers
|
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
|
||||||
)
|
] = arize_phoenix_config.otlp_auth_headers
|
||||||
|
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if (
|
if (
|
||||||
|
@ -2832,9 +2825,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
||||||
exporter="otlp_http",
|
exporter="otlp_http",
|
||||||
endpoint="https://langtrace.ai/api/trace",
|
endpoint="https://langtrace.ai/api/trace",
|
||||||
)
|
)
|
||||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
os.environ[
|
||||||
f"api_key={os.getenv('LANGTRACE_API_KEY')}"
|
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
|
||||||
)
|
] = f"api_key={os.getenv('LANGTRACE_API_KEY')}"
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if (
|
if (
|
||||||
isinstance(callback, OpenTelemetry)
|
isinstance(callback, OpenTelemetry)
|
||||||
|
@ -3114,6 +3107,7 @@ class StandardLoggingPayloadSetup:
|
||||||
litellm_params: Optional[dict] = None,
|
litellm_params: Optional[dict] = None,
|
||||||
prompt_integration: Optional[str] = None,
|
prompt_integration: Optional[str] = None,
|
||||||
applied_guardrails: Optional[List[str]] = None,
|
applied_guardrails: Optional[List[str]] = None,
|
||||||
|
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
|
||||||
) -> StandardLoggingMetadata:
|
) -> StandardLoggingMetadata:
|
||||||
"""
|
"""
|
||||||
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
|
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
|
||||||
|
@ -3160,6 +3154,7 @@ class StandardLoggingPayloadSetup:
|
||||||
user_api_key_end_user_id=None,
|
user_api_key_end_user_id=None,
|
||||||
prompt_management_metadata=prompt_management_metadata,
|
prompt_management_metadata=prompt_management_metadata,
|
||||||
applied_guardrails=applied_guardrails,
|
applied_guardrails=applied_guardrails,
|
||||||
|
mcp_tool_call_metadata=mcp_tool_call_metadata,
|
||||||
)
|
)
|
||||||
if isinstance(metadata, dict):
|
if isinstance(metadata, dict):
|
||||||
# Filter the metadata dictionary to include only the specified keys
|
# Filter the metadata dictionary to include only the specified keys
|
||||||
|
@ -3223,7 +3218,6 @@ class StandardLoggingPayloadSetup:
|
||||||
custom_llm_provider: Optional[str],
|
custom_llm_provider: Optional[str],
|
||||||
init_response_obj: Union[Any, BaseModel, dict],
|
init_response_obj: Union[Any, BaseModel, dict],
|
||||||
) -> StandardLoggingModelInformation:
|
) -> StandardLoggingModelInformation:
|
||||||
|
|
||||||
model_cost_name = _select_model_name_for_cost_calc(
|
model_cost_name = _select_model_name_for_cost_calc(
|
||||||
model=None,
|
model=None,
|
||||||
completion_response=init_response_obj, # type: ignore
|
completion_response=init_response_obj, # type: ignore
|
||||||
|
@ -3286,7 +3280,6 @@ class StandardLoggingPayloadSetup:
|
||||||
def get_additional_headers(
|
def get_additional_headers(
|
||||||
additiona_headers: Optional[dict],
|
additiona_headers: Optional[dict],
|
||||||
) -> Optional[StandardLoggingAdditionalHeaders]:
|
) -> Optional[StandardLoggingAdditionalHeaders]:
|
||||||
|
|
||||||
if additiona_headers is None:
|
if additiona_headers is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -3322,11 +3315,11 @@ class StandardLoggingPayloadSetup:
|
||||||
for key in StandardLoggingHiddenParams.__annotations__.keys():
|
for key in StandardLoggingHiddenParams.__annotations__.keys():
|
||||||
if key in hidden_params:
|
if key in hidden_params:
|
||||||
if key == "additional_headers":
|
if key == "additional_headers":
|
||||||
clean_hidden_params["additional_headers"] = (
|
clean_hidden_params[
|
||||||
StandardLoggingPayloadSetup.get_additional_headers(
|
"additional_headers"
|
||||||
|
] = StandardLoggingPayloadSetup.get_additional_headers(
|
||||||
hidden_params[key]
|
hidden_params[key]
|
||||||
)
|
)
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
clean_hidden_params[key] = hidden_params[key] # type: ignore
|
clean_hidden_params[key] = hidden_params[key] # type: ignore
|
||||||
return clean_hidden_params
|
return clean_hidden_params
|
||||||
|
@ -3463,13 +3456,15 @@ def get_standard_logging_object_payload(
|
||||||
)
|
)
|
||||||
|
|
||||||
# cleanup timestamps
|
# cleanup timestamps
|
||||||
start_time_float, end_time_float, completion_start_time_float = (
|
(
|
||||||
StandardLoggingPayloadSetup.cleanup_timestamps(
|
start_time_float,
|
||||||
|
end_time_float,
|
||||||
|
completion_start_time_float,
|
||||||
|
) = StandardLoggingPayloadSetup.cleanup_timestamps(
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
completion_start_time=completion_start_time,
|
completion_start_time=completion_start_time,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
response_time = StandardLoggingPayloadSetup.get_response_time(
|
response_time = StandardLoggingPayloadSetup.get_response_time(
|
||||||
start_time_float=start_time_float,
|
start_time_float=start_time_float,
|
||||||
end_time_float=end_time_float,
|
end_time_float=end_time_float,
|
||||||
|
@ -3486,6 +3481,7 @@ def get_standard_logging_object_payload(
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
prompt_integration=kwargs.get("prompt_integration", None),
|
prompt_integration=kwargs.get("prompt_integration", None),
|
||||||
applied_guardrails=kwargs.get("applied_guardrails", None),
|
applied_guardrails=kwargs.get("applied_guardrails", None),
|
||||||
|
mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
|
||||||
)
|
)
|
||||||
|
|
||||||
_request_body = proxy_server_request.get("body", {})
|
_request_body = proxy_server_request.get("body", {})
|
||||||
|
@ -3495,7 +3491,6 @@ def get_standard_logging_object_payload(
|
||||||
|
|
||||||
saved_cache_cost: float = 0.0
|
saved_cache_cost: float = 0.0
|
||||||
if cache_hit is True:
|
if cache_hit is True:
|
||||||
|
|
||||||
id = f"{id}_cache_hit{time.time()}" # do not duplicate the request id
|
id = f"{id}_cache_hit{time.time()}" # do not duplicate the request id
|
||||||
saved_cache_cost = (
|
saved_cache_cost = (
|
||||||
logging_obj._response_cost_calculator(
|
logging_obj._response_cost_calculator(
|
||||||
|
@ -3626,6 +3621,7 @@ def get_standard_logging_metadata(
|
||||||
user_api_key_end_user_id=None,
|
user_api_key_end_user_id=None,
|
||||||
prompt_management_metadata=None,
|
prompt_management_metadata=None,
|
||||||
applied_guardrails=None,
|
applied_guardrails=None,
|
||||||
|
mcp_tool_call_metadata=None,
|
||||||
)
|
)
|
||||||
if isinstance(metadata, dict):
|
if isinstance(metadata, dict):
|
||||||
# Filter the metadata dictionary to include only the specified keys
|
# Filter the metadata dictionary to include only the specified keys
|
||||||
|
@ -3658,9 +3654,9 @@ def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]):
|
||||||
):
|
):
|
||||||
for k, v in metadata["user_api_key_metadata"].items():
|
for k, v in metadata["user_api_key_metadata"].items():
|
||||||
if k == "logging": # prevent logging user logging keys
|
if k == "logging": # prevent logging user logging keys
|
||||||
cleaned_user_api_key_metadata[k] = (
|
cleaned_user_api_key_metadata[
|
||||||
"scrubbed_by_litellm_for_sensitive_keys"
|
k
|
||||||
)
|
] = "scrubbed_by_litellm_for_sensitive_keys"
|
||||||
else:
|
else:
|
||||||
cleaned_user_api_key_metadata[k] = v
|
cleaned_user_api_key_metadata[k] = v
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Helper utilities for cost_per_token()
|
## Helper utilities for cost_per_token()
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple, cast
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
|
@ -121,6 +121,31 @@ def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> floa
|
||||||
return model_info["output_cost_per_token"]
|
return model_info["output_cost_per_token"]
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_cost_component(
|
||||||
|
model_info: ModelInfo, cost_key: str, usage_value: Optional[float]
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Generic cost calculator for any usage component
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_info: Dictionary containing cost information
|
||||||
|
cost_key: The key for the cost multiplier in model_info (e.g., 'input_cost_per_audio_token')
|
||||||
|
usage_value: The actual usage value (e.g., number of tokens, characters, seconds)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: The calculated cost
|
||||||
|
"""
|
||||||
|
cost_per_unit = model_info.get(cost_key)
|
||||||
|
if (
|
||||||
|
cost_per_unit is not None
|
||||||
|
and isinstance(cost_per_unit, float)
|
||||||
|
and usage_value is not None
|
||||||
|
and usage_value > 0
|
||||||
|
):
|
||||||
|
return float(usage_value) * cost_per_unit
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
def generic_cost_per_token(
|
def generic_cost_per_token(
|
||||||
model: str, usage: Usage, custom_llm_provider: str
|
model: str, usage: Usage, custom_llm_provider: str
|
||||||
) -> Tuple[float, float]:
|
) -> Tuple[float, float]:
|
||||||
|
@ -136,6 +161,7 @@ def generic_cost_per_token(
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||||
"""
|
"""
|
||||||
|
|
||||||
## GET MODEL INFO
|
## GET MODEL INFO
|
||||||
model_info = get_model_info(model=model, custom_llm_provider=custom_llm_provider)
|
model_info = get_model_info(model=model, custom_llm_provider=custom_llm_provider)
|
||||||
|
|
||||||
|
@ -143,38 +169,124 @@ def generic_cost_per_token(
|
||||||
### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
|
### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
|
||||||
prompt_cost = 0.0
|
prompt_cost = 0.0
|
||||||
### PROCESSING COST
|
### PROCESSING COST
|
||||||
non_cache_hit_tokens = usage.prompt_tokens
|
text_tokens = usage.prompt_tokens
|
||||||
cache_hit_tokens = 0
|
cache_hit_tokens = 0
|
||||||
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
audio_tokens = 0
|
||||||
cache_hit_tokens = usage.prompt_tokens_details.cached_tokens
|
character_count = 0
|
||||||
non_cache_hit_tokens = non_cache_hit_tokens - cache_hit_tokens
|
image_count = 0
|
||||||
|
video_length_seconds = 0
|
||||||
|
if usage.prompt_tokens_details:
|
||||||
|
cache_hit_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int], getattr(usage.prompt_tokens_details, "cached_tokens", 0)
|
||||||
|
)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
text_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int], getattr(usage.prompt_tokens_details, "text_tokens", None)
|
||||||
|
)
|
||||||
|
or 0 # default to prompt tokens, if this field is not set
|
||||||
|
)
|
||||||
|
audio_tokens = (
|
||||||
|
cast(Optional[int], getattr(usage.prompt_tokens_details, "audio_tokens", 0))
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
character_count = (
|
||||||
|
cast(
|
||||||
|
Optional[int],
|
||||||
|
getattr(usage.prompt_tokens_details, "character_count", 0),
|
||||||
|
)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
image_count = (
|
||||||
|
cast(Optional[int], getattr(usage.prompt_tokens_details, "image_count", 0))
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
video_length_seconds = (
|
||||||
|
cast(
|
||||||
|
Optional[int],
|
||||||
|
getattr(usage.prompt_tokens_details, "video_length_seconds", 0),
|
||||||
|
)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
## EDGE CASE - text tokens not set inside PromptTokensDetails
|
||||||
|
if text_tokens == 0:
|
||||||
|
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
|
||||||
|
|
||||||
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
|
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
|
||||||
|
|
||||||
prompt_cost = float(non_cache_hit_tokens) * prompt_base_cost
|
prompt_cost = float(text_tokens) * prompt_base_cost
|
||||||
|
|
||||||
_cache_read_input_token_cost = model_info.get("cache_read_input_token_cost")
|
### CACHE READ COST
|
||||||
if (
|
prompt_cost += calculate_cost_component(
|
||||||
_cache_read_input_token_cost is not None
|
model_info, "cache_read_input_token_cost", cache_hit_tokens
|
||||||
and usage.prompt_tokens_details
|
)
|
||||||
and usage.prompt_tokens_details.cached_tokens
|
|
||||||
):
|
### AUDIO COST
|
||||||
prompt_cost += (
|
prompt_cost += calculate_cost_component(
|
||||||
float(usage.prompt_tokens_details.cached_tokens)
|
model_info, "input_cost_per_audio_token", audio_tokens
|
||||||
* _cache_read_input_token_cost
|
|
||||||
)
|
)
|
||||||
|
|
||||||
### CACHE WRITING COST
|
### CACHE WRITING COST
|
||||||
_cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost")
|
prompt_cost += calculate_cost_component(
|
||||||
if _cache_creation_input_token_cost is not None:
|
model_info,
|
||||||
prompt_cost += (
|
"cache_creation_input_token_cost",
|
||||||
float(usage._cache_creation_input_tokens) * _cache_creation_input_token_cost
|
usage._cache_creation_input_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
### CHARACTER COST
|
||||||
|
|
||||||
|
prompt_cost += calculate_cost_component(
|
||||||
|
model_info, "input_cost_per_character", character_count
|
||||||
|
)
|
||||||
|
|
||||||
|
### IMAGE COUNT COST
|
||||||
|
prompt_cost += calculate_cost_component(
|
||||||
|
model_info, "input_cost_per_image", image_count
|
||||||
|
)
|
||||||
|
|
||||||
|
### VIDEO LENGTH COST
|
||||||
|
prompt_cost += calculate_cost_component(
|
||||||
|
model_info, "input_cost_per_video_per_second", video_length_seconds
|
||||||
)
|
)
|
||||||
|
|
||||||
## CALCULATE OUTPUT COST
|
## CALCULATE OUTPUT COST
|
||||||
completion_base_cost = _get_completion_token_base_cost(
|
completion_base_cost = _get_completion_token_base_cost(
|
||||||
model_info=model_info, usage=usage
|
model_info=model_info, usage=usage
|
||||||
)
|
)
|
||||||
completion_cost = usage["completion_tokens"] * completion_base_cost
|
text_tokens = usage.completion_tokens
|
||||||
|
audio_tokens = 0
|
||||||
|
if usage.completion_tokens_details is not None:
|
||||||
|
audio_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int],
|
||||||
|
getattr(usage.completion_tokens_details, "audio_tokens", 0),
|
||||||
|
)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
text_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int],
|
||||||
|
getattr(usage.completion_tokens_details, "text_tokens", None),
|
||||||
|
)
|
||||||
|
or usage.completion_tokens # default to completion tokens, if this field is not set
|
||||||
|
)
|
||||||
|
|
||||||
|
## TEXT COST
|
||||||
|
completion_cost = float(text_tokens) * completion_base_cost
|
||||||
|
|
||||||
|
_output_cost_per_audio_token: Optional[float] = model_info.get(
|
||||||
|
"output_cost_per_audio_token"
|
||||||
|
)
|
||||||
|
|
||||||
|
## AUDIO COST
|
||||||
|
if (
|
||||||
|
_output_cost_per_audio_token is not None
|
||||||
|
and audio_tokens is not None
|
||||||
|
and audio_tokens > 0
|
||||||
|
):
|
||||||
|
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
|
||||||
|
|
||||||
return prompt_cost, completion_cost
|
return prompt_cost, completion_cost
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue