Merge branch 'main' into litellm_sagemaker_fix_stream

2025-04-26 19:24:27 +00:00 · 2025-03-31 14:22:20 -07:00 · 2025-03-31 14:22:20 -07:00 · 83ba96b8c6
commit 83ba96b8c6
parent 12639b7ccf ce5f55d04e
452 changed files with 13927 additions and 3613 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -3,6 +3,18 @@ orbs:
  codecov: codecov/codecov@4.0.1
  node: circleci/node@5.1.0  # Add this line to declare the node orb
 commands:
  setup_google_dns:
    steps:
      - run:
          name: "Configure Google DNS"
          command: |
            # Backup original resolv.conf
            sudo cp /etc/resolv.conf /etc/resolv.conf.backup
            # Add both local and Google DNS servers
            echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
            echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
            echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
 jobs:
  local_testing:
@ -15,7 +27,7 @@ jobs:
    steps:
      - checkout
-
+      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -134,7 +146,7 @@ jobs:
    steps:
      - checkout
-
+      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -234,7 +246,13 @@ jobs:
    steps:
      - checkout
-
+      - setup_google_dns
      - run:
          name: DNS lookup for Redis host
          command: |
            sudo apt-get update
            sudo apt-get install -y dnsutils
            dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
      - run:
          name: Show git commit hash
          command: |
@ -334,6 +352,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -388,6 +407,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -429,6 +449,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -479,7 +500,13 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
-
+      - run:
          name: Install PostgreSQL
          command: |
            sudo apt-get update
            sudo apt-get install postgresql postgresql-contrib
            echo 'export PATH=/usr/lib/postgresql/*/bin:$PATH' >> $BASH_ENV
      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -534,6 +561,7 @@ jobs:
            pip install "diskcache==5.6.1"
            pip install "Pillow==10.3.0"
            pip install "jsonschema==4.22.0"
            pip install "pytest-postgresql==7.0.1"
      - save_cache:
          paths:
            - ./venv
@ -569,7 +597,7 @@ jobs:
            - litellm_proxy_unit_tests_coverage
  litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
    docker:
-      - image: cimg/python:3.11
+      - image: cimg/python:3.13.1
        auth:
          username: ${DOCKERHUB_USERNAME}
          password: ${DOCKERHUB_PASSWORD}
@ -577,6 +605,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -618,6 +647,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -654,6 +684,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -696,6 +727,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -740,6 +772,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -782,6 +815,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -828,6 +862,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -872,6 +907,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -918,6 +954,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -960,6 +997,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1002,6 +1040,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1048,6 +1087,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1080,6 +1120,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1104,6 +1145,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      # Install Helm
      - run:
          name: Install Helm
@ -1173,6 +1215,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1209,6 +1252,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Python 3.9
          command: |
@ -1283,6 +1327,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1418,6 +1463,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1542,6 +1588,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1704,6 +1751,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1815,6 +1863,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1897,6 +1946,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      # Remove Docker CLI installation since it's already available in machine executor
      - run:
          name: Install Python 3.13
@ -1994,6 +2044,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -2039,6 +2090,8 @@ jobs:
            pip install "google-cloud-aiplatform==1.59.0"
            pip install "anthropic==0.49.0"
            pip install "langchain_mcp_adapters==0.0.5"
            pip install "langchain_openai==0.2.1"
            pip install "langgraph==0.3.18"
      # Run pytest and generate JUnit XML report
      - run:
          name: Build Docker image
@ -2251,6 +2304,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Build UI
          command: |
@ -2365,6 +2419,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Build Docker image
          command: |
@ -2387,6 +2442,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Build Docker image
          command: |
--- a/.github/workflows/publish-migrations.yml
+++ b/.github/workflows/publish-migrations.yml
@ -0,0 +1,206 @@
 name: Publish Prisma Migrations
 permissions:
  contents: write
  pull-requests: write
 on:
  push:
    paths:
      - 'schema.prisma'  # Check root schema.prisma
    branches:
      - main
 jobs:
  publish-migrations:
    runs-on: ubuntu-latest
    services:
      postgres:
        image: postgres:14
        env:
          POSTGRES_DB: temp_db
          POSTGRES_USER: postgres
          POSTGRES_PASSWORD: postgres
        ports:
          - 5432:5432
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
      # Add shadow database service
      postgres_shadow:
        image: postgres:14
        env:
          POSTGRES_DB: shadow_db
          POSTGRES_USER: postgres
          POSTGRES_PASSWORD: postgres
        ports:
          - 5433:5432
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
    steps:
      - uses: actions/checkout@v3
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.x'
      - name: Install Dependencies
        run: |
          pip install prisma
          pip install python-dotenv
      - name: Generate Initial Migration if None Exists
        env:
          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
        run: |
          mkdir -p deploy/migrations
          echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
          if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
            echo "No existing migrations found, creating baseline..."
            VERSION=$(date +%Y%m%d%H%M%S)
            mkdir -p deploy/migrations/${VERSION}_initial
            echo "Generating initial migration..."
            # Save raw output for debugging
            prisma migrate diff \
              --from-empty \
              --to-schema-datamodel schema.prisma \
              --shadow-database-url "${SHADOW_DATABASE_URL}" \
              --script > deploy/migrations/${VERSION}_initial/raw_migration.sql
            echo "Raw migration file content:"
            cat deploy/migrations/${VERSION}_initial/raw_migration.sql
            echo "Cleaning migration file..."
            # Clean the file
            sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
            # Verify the migration file
            if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
              echo "ERROR: Migration file is empty after cleaning"
              echo "Original content was:"
              cat deploy/migrations/${VERSION}_initial/raw_migration.sql
              exit 1
            fi
            echo "Final migration file content:"
            cat deploy/migrations/${VERSION}_initial/migration.sql
            # Verify it starts with SQL
            if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
              echo "ERROR: Migration file does not start with SQL command or comment"
              echo "First line is:"
              head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
              echo "Full content is:"
              cat deploy/migrations/${VERSION}_initial/migration.sql
              exit 1
            fi
            echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
          fi
      - name: Compare and Generate Migration
        if: success()
        env:
          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
        run: |
          # Create temporary migration workspace
          mkdir -p temp_migrations
          # Copy existing migrations (will not fail if directory is empty)
          cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
          VERSION=$(date +%Y%m%d%H%M%S)
          # Generate diff against existing migrations or empty state
          prisma migrate diff \
            --from-migrations temp_migrations \
            --to-schema-datamodel schema.prisma \
            --shadow-database-url "${SHADOW_DATABASE_URL}" \
            --script > temp_migrations/migration_${VERSION}.sql
          # Check if there are actual changes
          if [ -s temp_migrations/migration_${VERSION}.sql ]; then
            echo "Changes detected, creating new migration"
            mkdir -p deploy/migrations/${VERSION}_schema_update
            mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
            echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
          else
            echo "No schema changes detected"
            exit 0
          fi
      - name: Verify Migration
        if: success()
        env:
          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
        run: |
          # Create test database
          psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
          # Apply all migrations in order to verify
          for migration in deploy/migrations/*/migration.sql; do
            echo "Applying migration: $migration"
            psql "${SHADOW_DATABASE_URL}" -f $migration
          done
      # Add this step before create-pull-request to debug permissions
      - name: Check Token Permissions
        run: |
          echo "Checking token permissions..."
          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
               -H "Accept: application/vnd.github.v3+json" \
               https://api.github.com/repos/BerriAI/litellm/collaborators
          echo "\nChecking if token can create PRs..."
          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
               -H "Accept: application/vnd.github.v3+json" \
               https://api.github.com/repos/BerriAI/litellm
      # Add this debug step before git push
      - name: Debug Changed Files
        run: |
          echo "Files staged for commit:"
          git diff --name-status --staged
          echo "\nAll changed files:"
          git status
      - name: Create Pull Request
        if: success()
        uses: peter-evans/create-pull-request@v5
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          commit-message: "chore: update prisma migrations"
          title: "Update Prisma Migrations"
          body: |
            Auto-generated migration based on schema.prisma changes.
            Generated files:
            - deploy/migrations/${VERSION}_schema_update/migration.sql
            - deploy/migrations/${VERSION}_schema_update/README.md
          branch: feat/prisma-migration-${{ env.VERSION }}
          base: main
          delete-branch: true
      - name: Generate and Save Migrations
        run: |
          # Only add migration files
          git add deploy/migrations/
          git status  # Debug what's being committed
          git commit -m "chore: update prisma migrations" 
--- a/.github/workflows/test-linting.yml
+++ b/.github/workflows/test-linting.yml
@ -0,0 +1,53 @@
 name: LiteLLM Linting
 on:
  pull_request:
    branches: [ main ]
 jobs:
  lint:
    runs-on: ubuntu-latest
    timeout-minutes: 5
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.12'
    - name: Install Poetry
      uses: snok/install-poetry@v1
    - name: Install dependencies
      run: |
        poetry install --with dev
    - name: Run Black formatting check
      run: |
        cd litellm
        poetry run black . --check
        cd ..
    - name: Run Ruff linting
      run: |
        cd litellm
        poetry run ruff check .
        cd ..
    - name: Run MyPy type checking
      run: |
        cd litellm
        poetry run mypy . --ignore-missing-imports
        cd ..
    - name: Check for circular imports
      run: |
        cd litellm
        poetry run python ../tests/documentation_tests/test_circular_imports.py
        cd ..
    - name: Check import safety
      run: |
        poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
--- a/.github/workflows/test-litellm.yml
+++ b/.github/workflows/test-litellm.yml
@ -0,0 +1,35 @@
 name: LiteLLM Mock Tests (folder - tests/litellm)
 on:
  pull_request:
    branches: [ main ]
 jobs:
  test:
    runs-on: ubuntu-latest
    timeout-minutes: 5
    steps:
    - uses: actions/checkout@v4
    - name: Thank You Message
      run: |
        echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
        echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.12'
    - name: Install Poetry
      uses: snok/install-poetry@v1
    - name: Install dependencies
      run: |
        poetry install --with dev,proxy-dev --extras proxy
        poetry run pip install pytest-xdist
    - name: Run tests
      run: |
        poetry run pytest tests/litellm -x -vv -n 4 
--- a/.gitignore
+++ b/.gitignore
@ -83,4 +83,5 @@ tests/llm_translation/test_vertex_key.json
 litellm/proxy/migrations/0_init/migration.sql
 litellm/proxy/db/migrations/0_init/migration.sql
 litellm/proxy/db/migrations/*
 litellm/proxy/migrations/*config.yaml
 litellm/proxy/migrations/*
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -6,44 +6,35 @@ repos:
        entry: pyright
        language: system
        types: [python]
-        files: ^litellm/
+        files: ^(litellm/|litellm_proxy_extras/)
    -   id: isort
        name: isort
        entry: isort
        language: system
        types: [python]
-        files: litellm/.*\.py
+        files: (litellm/|litellm_proxy_extras/).*\.py
        exclude: ^litellm/__init__.py$
 -   repo: https://github.com/psf/black
    rev: 24.2.0
    hooks:
    -   id: black
        name: black
        entry: poetry run black
        language: system
        types: [python]
        files: (litellm/|litellm_proxy_extras/).*\.py
 -   repo: https://github.com/pycqa/flake8
    rev: 7.0.0  # The version of flake8 to use
    hooks:
    -  id: flake8
       exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
       additional_dependencies: [flake8-print]
-       files: litellm/.*\.py
+       files: (litellm/|litellm_proxy_extras/).*\.py
    # -  id: flake8
    #    name: flake8 (router.py function length)
    #    files: ^litellm/router\.py$
    #    args: [--max-function-length=40]
    # #    additional_dependencies: [flake8-functions]
 -   repo: https://github.com/python-poetry/poetry
    rev: 1.8.0
    hooks:
      - id: poetry-check
        files: ^(pyproject.toml|litellm-proxy-extras/pyproject.toml)$
 -   repo: local
    hooks:
    -   id: check-files-match
        name: Check if files match
        entry: python3 ci_cd/check_files_match.py
        language: system
    # -   id: check-file-length
    #     name: Check file length
    #     entry: python check_file_length.py
    #     args: ["10000"]  # set your desired maximum number of lines
    #     language: python
    #     files: litellm/.*\.py
    #     exclude: ^litellm/tests/
--- a/3
+++ b/3
@ -14,6 +14,9 @@ help:
 install-dev:
 	poetry install --with dev
 install-proxy-dev:
 	poetry install --with dev,proxy-dev
 lint: install-dev
 	poetry run pip install types-requests types-setuptools types-redis types-PyYAML
 	cd litellm && poetry run mypy . --ignore-missing-imports
--- a/README.md
+++ b/README.md
@ -16,9 +16,6 @@
    <a href="https://pypi.org/project/litellm/" target="_blank">
        <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
    </a>
    <a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
        <img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
    </a>
    <a href="https://www.ycombinator.com/companies/berriai">
        <img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
    </a>
--- a/ci_cd/baseline_db.py
+++ b/ci_cd/baseline_db.py
@ -0,0 +1,60 @@
 import subprocess
 from pathlib import Path
 from datetime import datetime
 def create_baseline():
    """Create baseline migration in deploy/migrations"""
    try:
        # Get paths
        root_dir = Path(__file__).parent.parent
        deploy_dir = root_dir / "deploy"
        migrations_dir = deploy_dir / "migrations"
        schema_path = root_dir / "schema.prisma"
        # Create migrations directory
        migrations_dir.mkdir(parents=True, exist_ok=True)
        # Create migration_lock.toml if it doesn't exist
        lock_file = migrations_dir / "migration_lock.toml"
        if not lock_file.exists():
            lock_file.write_text('provider = "postgresql"\n')
        # Create timestamp-based migration directory
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        migration_dir = migrations_dir / f"{timestamp}_baseline"
        migration_dir.mkdir(parents=True, exist_ok=True)
        # Generate migration SQL
        result = subprocess.run(
            [
                "prisma",
                "migrate",
                "diff",
                "--from-empty",
                "--to-schema-datamodel",
                str(schema_path),
                "--script",
            ],
            capture_output=True,
            text=True,
            check=True,
        )
        # Write the SQL to migration.sql
        migration_file = migration_dir / "migration.sql"
        migration_file.write_text(result.stdout)
        print(f"Created baseline migration in {migration_dir}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Error running prisma command: {e.stderr}")
        return False
    except Exception as e:
        print(f"Error creating baseline migration: {str(e)}")
        return False
 if __name__ == "__main__":
    create_baseline()
--- a/ci_cd/publish-proxy-extras.sh
+++ b/ci_cd/publish-proxy-extras.sh
@ -0,0 +1,19 @@
 #!/bin/bash
 # Exit on error
 set -e
 echo "🚀 Building and publishing litellm-proxy-extras"
 # Navigate to litellm-proxy-extras directory
 cd "$(dirname "$0")/../litellm-proxy-extras"
 # Build the package
 echo "📦 Building package..."
 poetry build
 # Publish to PyPI
 echo "🌎 Publishing to PyPI..."
 poetry publish
 echo "✅ Done! Package published successfully"
--- a/ci_cd/run_migration.py
+++ b/ci_cd/run_migration.py
@ -0,0 +1,95 @@
 import os
 import subprocess
 from pathlib import Path
 from datetime import datetime
 import testing.postgresql
 import shutil
 def create_migration(migration_name: str = None):
    """
    Create a new migration SQL file in the migrations directory by comparing
    current database state with schema
    Args:
        migration_name (str): Name for the migration
    """
    try:
        # Get paths
        root_dir = Path(__file__).parent.parent
        migrations_dir = root_dir / "litellm-proxy-extras" / "litellm_proxy_extras" / "migrations"
        schema_path = root_dir / "schema.prisma"
        # Create temporary PostgreSQL database
        with testing.postgresql.Postgresql() as postgresql:
            db_url = postgresql.url()
            # Create temporary migrations directory next to schema.prisma
            temp_migrations_dir = schema_path.parent / "migrations"
            try:
                # Copy existing migrations to temp directory
                if temp_migrations_dir.exists():
                    shutil.rmtree(temp_migrations_dir)
                shutil.copytree(migrations_dir, temp_migrations_dir)
                # Apply existing migrations to temp database
                os.environ["DATABASE_URL"] = db_url
                subprocess.run(
                    ["prisma", "migrate", "deploy", "--schema", str(schema_path)],
                    check=True,
                )
                # Generate diff between current database and schema
                result = subprocess.run(
                    [
                        "prisma",
                        "migrate",
                        "diff",
                        "--from-url",
                        db_url,
                        "--to-schema-datamodel",
                        str(schema_path),
                        "--script",
                    ],
                    capture_output=True,
                    text=True,
                    check=True,
                )
                if result.stdout.strip():
                    # Generate timestamp and create migration directory
                    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                    migration_name = migration_name or "unnamed_migration"
                    migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
                    migration_dir.mkdir(parents=True, exist_ok=True)
                    # Write the SQL to migration.sql
                    migration_file = migration_dir / "migration.sql"
                    migration_file.write_text(result.stdout)
                    print(f"Created migration in {migration_dir}")
                    return True
                else:
                    print("No schema changes detected. Migration not needed.")
                    return False
            finally:
                # Clean up: remove temporary migrations directory
                if temp_migrations_dir.exists():
                    shutil.rmtree(temp_migrations_dir)
    except subprocess.CalledProcessError as e:
        print(f"Error generating migration: {e.stderr}")
        return False
    except Exception as e:
        print(f"Error creating migration: {str(e)}")
        return False
 if __name__ == "__main__":
    # If running directly, can optionally pass migration name as argument
    import sys
    migration_name = sys.argv[1] if len(sys.argv) > 1 else None
    create_migration(migration_name)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,5 +1,35 @@
 version: "3.11"
 services:
  litellm:
    build:
      context: .
      args:
        target: runtime
    image: ghcr.io/berriai/litellm:main-stable
    #########################################
    ## Uncomment these lines to start proxy with a config.yaml file ##
    # volumes:
    #  - ./config.yaml:/app/config.yaml <<- this is missing in the docker-compose file currently
    # command:
    #  - "--config=/app/config.yaml"
    ##############################################
    ports:
      - "4000:4000" # Map the container port to the host, change the host port if necessary
    environment:
        DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
        STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
    env_file:
      - .env # Load local .env file
    depends_on:
      - db  # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
    healthcheck:  # Defines the health check configuration for the container
      test: [ "CMD", "curl", "-f", "http://localhost:4000/health/liveliness || exit 1" ]  # Command to execute for health check
      interval: 30s  # Perform health check every 30 seconds
      timeout: 10s   # Health check command times out after 10 seconds
      retries: 3     # Retry up to 3 times if health check fails
      start_period: 40s  # Wait 40 seconds after container start before beginning health checks
  db:
    image: postgres:16
    restart: always
@ -16,3 +46,23 @@ services:
      interval: 1s
      timeout: 5s
      retries: 10
  prometheus:
    image: prom/prometheus
    volumes:
      - prometheus_data:/prometheus
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
    ports:
      - "9090:9090"
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--storage.tsdb.retention.time=15d'
    restart: always
 volumes:
  prometheus_data:
    driver: local
  postgres_data:
    name: litellm_postgres_data  # Named volume for Postgres data persistence
--- a/docs/my-website/docs/mcp.md
+++ b/docs/my-website/docs/mcp.md
@ -4,21 +4,177 @@ import Image from '@theme/IdealImage';
 # /mcp [BETA] - Model Context Protocol
-Use Model Context Protocol with LiteLLM
+## Expose MCP tools on LiteLLM Proxy Server
 This allows you to define tools that can be called by any MCP compatible client. Define your `mcp_servers` with LiteLLM and all your clients can list and call available tools.
 <Image 
-  img={require('../img/litellm_mcp.png')}
+  img={require('../img/mcp_2.png')}
  style={{width: '100%', display: 'block', margin: '2rem auto'}}
 />
 <p style={{textAlign: 'left', color: '#666'}}>
  LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
 </p>
 #### How it works
-## Overview
+LiteLLM exposes the following MCP endpoints:
-LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
+- `/mcp/tools/list` - List all available tools
 - `/mcp/tools/call` - Call a specific tool with the provided arguments
 When MCP clients connect to LiteLLM they can follow this workflow:
 1. Connect to the LiteLLM MCP server
 2. List all available tools on LiteLLM
 3. Client makes LLM API request with tool call(s)
 4. LLM API returns which tools to call and with what arguments
 5. MCP client makes MCP tool calls to LiteLLM
 6. LiteLLM makes the tool calls to the appropriate MCP server
 7. LiteLLM returns the tool call results to the MCP client
 #### Usage
 #### 1. Define your tools on under `mcp_servers` in your config.yaml file.
 LiteLLM allows you to define your tools on the `mcp_servers` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
 ```yaml title="config.yaml" showLineNumbers
 model_list:
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
      api_key: sk-xxxxxxx
 mcp_servers:
  {
    "zapier_mcp": {
      "url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
    },
    "fetch": {
      "url": "http://localhost:8000/sse"
    }
  }
 ```
 #### 2. Start LiteLLM Gateway
 <Tabs>
 <TabItem value="docker" label="Docker Run">
 ```shell title="Docker Run" showLineNumbers
 docker run -d \
  -p 4000:4000 \
  -e OPENAI_API_KEY=$OPENAI_API_KEY \
  --name my-app \
  -v $(pwd)/my_config.yaml:/app/config.yaml \
  my-app:latest \
  --config /app/config.yaml \
  --port 4000 \
  --detailed_debug \
 ```
 </TabItem>
 <TabItem value="py" label="litellm pip">
 ```shell title="litellm pip" showLineNumbers
 litellm --config config.yaml --detailed_debug
 ```
 </TabItem>
 </Tabs>
 #### 3. Make an LLM API request 
 In this example we will do the following:
 1. Use MCP client to list MCP tools on LiteLLM Proxy
 2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
 3. Provide the MCP tools to `gpt-4o`
 4. Handle tool call from `gpt-4o`
 5. Convert OpenAI tool call to MCP tool call
 6. Execute tool call on MCP server
 ```python title="MCP Client List Tools" showLineNumbers
 import asyncio
 from openai import AsyncOpenAI
 from openai.types.chat import ChatCompletionUserMessageParam
 from mcp import ClientSession
 from mcp.client.sse import sse_client
 from litellm.experimental_mcp_client.tools import (
    transform_mcp_tool_to_openai_tool,
    transform_openai_tool_call_request_to_mcp_tool_call_request,
 )
 async def main():
    # Initialize clients
    # point OpenAI client to LiteLLM Proxy
    client = AsyncOpenAI(api_key="sk-1234", base_url="http://localhost:4000")
    # Point MCP client to LiteLLM Proxy
    async with sse_client("http://localhost:4000/mcp/") as (read, write):
        async with ClientSession(read, write) as session:
            await session.initialize()
            # 1. List MCP tools on LiteLLM Proxy
            mcp_tools = await session.list_tools()
            print("List of MCP tools for MCP server:", mcp_tools.tools)
            # Create message
            messages = [
                ChatCompletionUserMessageParam(
                    content="Send an email about LiteLLM supporting MCP", role="user"
                )
            ]
            # 2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
            # Since OpenAI only supports tools in the OpenAI format, we need to convert the MCP tools to the OpenAI format.
            openai_tools = [
                transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
            ]
            # 3. Provide the MCP tools to `gpt-4o`
            response = await client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                tools=openai_tools,
                tool_choice="auto",
            )
            # 4. Handle tool call from `gpt-4o`
            if response.choices[0].message.tool_calls:
                tool_call = response.choices[0].message.tool_calls[0]
                if tool_call:
                    # 5. Convert OpenAI tool call to MCP tool call
                    # Since MCP servers expect tools in the MCP format, we need to convert the OpenAI tool call to the MCP format.
                    # This is done using litellm.experimental_mcp_client.tools.transform_openai_tool_call_request_to_mcp_tool_call_request
                    mcp_call = (
                        transform_openai_tool_call_request_to_mcp_tool_call_request(
                            openai_tool=tool_call.model_dump()
                        )
                    )
                    # 6. Execute tool call on MCP server
                    result = await session.call_tool(
                        name=mcp_call.name, arguments=mcp_call.arguments
                    )
                    print("Result:", result)
 # Run it
 asyncio.run(main())
 ```
 ## LiteLLM Python SDK MCP Bridge
 LiteLLM Python SDK acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
 - **List** Available MCP Tools: OpenAI clients can view all available MCP tools
  - `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
@ -26,8 +182,6 @@ LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported mod
  - `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
 ## Usage
 ### 1. List Available MCP Tools
 In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
@ -271,215 +425,3 @@ async with stdio_client(server_params) as (read, write):
 </TabItem>
 </Tabs>
 ## Upcoming Features
 :::info
 **This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
 :::
 ### Expose MCP tools on LiteLLM Proxy Server
 This allows you to define tools that can be called by any MCP compatible client. Define your mcp_tools with LiteLLM and all your clients can list and call available tools.
 #### How it works
 LiteLLM exposes the following MCP endpoints:
 - `/mcp/list_tools` - List all available tools
 - `/mcp/call_tool` - Call a specific tool with the provided arguments
 When MCP clients connect to LiteLLM they can follow this workflow:
 1. Connect to the LiteLLM MCP server
 2. List all available tools on LiteLLM
 3. Client makes LLM API request with tool call(s)
 4. LLM API returns which tools to call and with what arguments
 5. MCP client makes tool calls to LiteLLM
 6. LiteLLM makes the tool calls to the appropriate handlers
 7. LiteLLM returns the tool call results to the MCP client
 #### Usage
 #### 1. Define your tools on mcp_tools
 LiteLLM allows you to define your tools on the `mcp_tools` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
 ```yaml
 model_list:
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
      api_key: sk-xxxxxxx
 mcp_tools:
  - name: "get_current_time"
    description: "Get the current time"
    input_schema: {
      "type": "object",
      "properties": {
        "format": {
          "type": "string",
          "description": "The format of the time to return",
          "enum": ["short"]
        }
      }
    }
    handler: "mcp_tools.get_current_time"
 ```
 #### 2. Define a handler for your tool
 Create a new file called `mcp_tools.py` and add this code. The key method here is `get_current_time` which gets executed when the `get_current_time` tool is called.
 ```python
 # mcp_tools.py
 from datetime import datetime
 def get_current_time(format: str = "short"):
    """
    Simple handler for the 'get_current_time' tool.
    Args:
        format (str): The format of the time to return ('short').
    Returns:
        str: The current time formatted as 'HH:MM'.
    """
    # Get the current time
    current_time = datetime.now()
    # Format the time as 'HH:MM'
    return current_time.strftime('%H:%M')
 ```
 #### 3. Start LiteLLM Gateway
 <Tabs>
 <TabItem value="docker" label="Docker Run">
 Mount your `mcp_tools.py` on the LiteLLM Docker container.
 ```shell
 docker run -d \
  -p 4000:4000 \
  -e OPENAI_API_KEY=$OPENAI_API_KEY \
  --name my-app \
  -v $(pwd)/my_config.yaml:/app/config.yaml \
  -v $(pwd)/mcp_tools.py:/app/mcp_tools.py \
  my-app:latest \
  --config /app/config.yaml \
  --port 4000 \
  --detailed_debug \
 ```
 </TabItem>
 <TabItem value="py" label="litellm pip">
 ```shell
 litellm --config config.yaml --detailed_debug
 ```
 </TabItem>
 </Tabs>
 #### 4. Make an LLM API request 
 ```python
 import asyncio
 from langchain_mcp_adapters.tools import load_mcp_tools
 from langchain_openai import ChatOpenAI
 from langgraph.prebuilt import create_react_agent
 from mcp import ClientSession
 from mcp.client.sse import sse_client
 async def main():
    # Initialize the model with your API key
    model = ChatOpenAI(model="gpt-4o")
    # Connect to the MCP server
    async with sse_client(url="http://localhost:4000/mcp/") as (read, write):
        async with ClientSession(read, write) as session:
            # Initialize the session
            print("Initializing session...")
            await session.initialize()
            print("Session initialized")
            # Load available tools from MCP
            print("Loading tools...")
            tools = await load_mcp_tools(session)
            print(f"Loaded {len(tools)} tools")
            # Create a ReAct agent with the model and tools
            agent = create_react_agent(model, tools)
            # Run the agent with a user query
            user_query = "What's the weather in Tokyo?"
            print(f"Asking: {user_query}")
            agent_response = await agent.ainvoke({"messages": user_query})
            print("Agent response:")
            print(agent_response)
 if __name__ == "__main__":
    asyncio.run(main())
 ```
 ### Specification for `mcp_tools`
 The `mcp_tools` section in your LiteLLM config defines tools that can be called by MCP-compatible clients.
 #### Tool Definition Format
 ```yaml
 mcp_tools:
  - name: string                # Required: Name of the tool
    description: string         # Required: Description of what the tool does
    input_schema: object        # Required: JSON Schema defining the tool's input parameters
    handler: string             # Required: Path to the function that implements the tool
 ```
 #### Field Details
 - `name`: A unique identifier for the tool
 - `description`: A clear description of what the tool does, used by LLMs to determine when to call it
 - `input_schema`: JSON Schema object defining the expected input parameters
 - `handler`: String path to the Python function that implements the tool (e.g., "module.submodule.function_name")
 #### Example Tool Definition
 ```yaml
 mcp_tools:
  - name: "get_current_time"
    description: "Get the current time in a specified format"
    input_schema: {
      "type": "object",
      "properties": {
        "format": {
          "type": "string",
          "description": "The format of the time to return",
          "enum": ["short", "long", "iso"]
        },
        "timezone": {
          "type": "string",
          "description": "The timezone to use (e.g., 'UTC', 'America/New_York')",
          "default": "UTC"
        }
      },
      "required": ["format"]
    }
    handler: "mcp_tools.get_current_time"
 ```
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -664,6 +664,58 @@ curl http://0.0.0.0:4000/v1/chat/completions \
 </TabItem>
 </Tabs>
 ## Usage - Latency Optimized Inference
 Valid from v1.65.1+
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import completion
 response = completion(
    model="bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0",
    messages=[{"role": "user", "content": "What is the capital of France?"}],
    performanceConfig={"latency": "optimized"},
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 1. Setup config.yaml
 ```yaml
 model_list:
  - model_name: bedrock-claude-3-7
    litellm_params:
      model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
      performanceConfig: {"latency": "optimized"} # 👈 EITHER HERE OR ON REQUEST
 ```
 2. Start proxy 
 ```bash
 litellm --config /path/to/config.yaml
 ```
 3. Test it!
 ```bash
 curl http://0.0.0.0:4000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $LITELLM_KEY" \
  -d '{
    "model": "bedrock-claude-3-7",
    "messages": [{"role": "user", "content": "What is the capital of France?"}],
    "performanceConfig": {"latency": "optimized"} # 👈 EITHER HERE OR ON CONFIG.YAML
  }'
 ```
 </TabItem>
 </Tabs>
 ## Usage - Bedrock Guardrails
 Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
@ -1776,6 +1828,7 @@ response = completion(
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 1. Setup config.yaml 
@ -1820,11 +1873,13 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 ```
 </TabItem>
 </Tabs>
 ### SSO Login (AWS Profile)
 - Set `AWS_PROFILE` environment variable
 - Make bedrock completion call
 ```python
 import os
 from litellm import completion
@ -1917,12 +1972,46 @@ model_list:
 </Tabs>
 Text to Image : 
 ```bash
 curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
 -d '{
    "model": "amazon.nova-canvas-v1:0",
    "prompt": "A cute baby sea otter"
 }'
 ```
 Color Guided Generation:
 ```bash
 curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
 -d '{
    "model": "amazon.nova-canvas-v1:0",
    "prompt": "A cute baby sea otter",
    "taskType": "COLOR_GUIDED_GENERATION",
    "colorGuidedGenerationParams":{"colors":["#FFFFFF"]}
 }'
 ```
 | Model Name              | Function Call                               |
 |-------------------------|---------------------------------------------|
 | Stable Diffusion 3 - v0 | `image_generation(model="bedrock/stability.stability.sd3-large-v1:0", prompt=prompt)` |
 | Stable Diffusion - v0   | `image_generation(model="bedrock/stability.stable-diffusion-xl-v0", prompt=prompt)` |
 | Stable Diffusion - v1   | `image_generation(model="bedrock/stability.stable-diffusion-xl-v1", prompt=prompt)` |
 | Amazon Nova Canvas - v0 | `image_generation(model="bedrock/amazon.nova-canvas-v1:0", prompt=prompt)` |
 ### Passing an external BedrockRuntime.Client as a parameter - Completion()
 This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
 :::warning
-This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
+
 Experimental - 2024-Jun-23:
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@ -589,8 +589,10 @@ response = litellm.completion(
            "content": [
                {"type": "text", "text": "Please summarize the audio."},
                {
-                    "type": "image_url",
+                    "type": "file",
-                    "image_url": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
+                    "file": {
                        "file_data": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
                    }
                },
            ],
        }
@ -640,8 +642,11 @@ response = litellm.completion(
            "content": [
                {"type": "text", "text": "Please summarize the file."},
                {
-                    "type": "image_url",
+                    "type": "file",
-                    "image_url": "https://storage..." # 👈 SET THE IMG URL
+                    "file": {
                        "file_id": "https://storage...", # 👈 SET THE IMG URL
                        "format": "application/pdf" # OPTIONAL
                    }
                },
            ],
        }
@ -668,8 +673,11 @@ response = litellm.completion(
            "content": [
                {"type": "text", "text": "Please summarize the file."},
                {
-                    "type": "image_url",
+                    "type": "file",
-                    "image_url": "gs://..." # 👈 SET THE cloud storage bucket url
+                    "file": {
                        "file_id": "gs://storage...", # 👈 SET THE IMG URL
                        "format": "application/pdf" # OPTIONAL
                    }
                },
            ],
        }
--- a/docs/my-website/docs/providers/openai.md
+++ b/docs/my-website/docs/providers/openai.md
@ -325,6 +325,74 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 | fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
 ## OpenAI Audio Transcription
 LiteLLM supports OpenAI Audio Transcription endpoint.
 Supported models:
 | Model Name                | Function Call                                                          |
 |---------------------------|-----------------------------------------------------------------|
 | `whisper-1`    | `response = completion(model="whisper-1", file=audio_file)`     |
 | `gpt-4o-transcribe` | `response = completion(model="gpt-4o-transcribe", file=audio_file)` |
 | `gpt-4o-mini-transcribe` | `response = completion(model="gpt-4o-mini-transcribe", file=audio_file)` |
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import transcription
 import os 
 # set api keys 
 os.environ["OPENAI_API_KEY"] = ""
 audio_file = open("/path/to/audio.mp3", "rb")
 response = transcription(model="gpt-4o-transcribe", file=audio_file)
 print(f"response: {response}")
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 1. Setup config.yaml
 ```yaml
 model_list:
 - model_name: gpt-4o-transcribe
  litellm_params:
    model: gpt-4o-transcribe
    api_key: os.environ/OPENAI_API_KEY
  model_info:
    mode: audio_transcription
 general_settings:
  master_key: sk-1234
 ```
 2. Start the proxy
 ```bash
 litellm --config config.yaml
 ```
 3. Test it!
 ```bash
 curl --location 'http://0.0.0.0:8000/v1/audio/transcriptions' \
 --header 'Authorization: Bearer sk-1234' \
 --form 'file=@"/Users/krrishdholakia/Downloads/gettysburg.wav"' \
 --form 'model="gpt-4o-transcribe"'
 ```
 </TabItem>
 </Tabs>
 ## Advanced
 ### Getting OpenAI API Response Headers 
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -1369,6 +1369,103 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 </Tabs>
 ## Gemini Pro
 | Model Name       | Function Call                        |
 |------------------|--------------------------------------|
 | gemini-pro   | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
 ## Fine-tuned Models
 You can call fine-tuned Vertex AI Gemini models through LiteLLM
 | Property | Details |
 |----------|---------|
 | Provider Route | `vertex_ai/gemini/{MODEL_ID}` |
 | Vertex Documentation | [Vertex AI - Fine-tuned Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning#test_the_tuned_model_with_a_prompt)|
 | Supported Operations | `/chat/completions`, `/completions`, `/embeddings`, `/images` |
 To use a model that follows the `/gemini` request/response format, simply set the model parameter as 
 ```python title="Model parameter for calling fine-tuned gemini models"
 model="vertex_ai/gemini/<your-finetuned-model>"
 ```
 <Tabs>
 <TabItem value="sdk" label="LiteLLM Python SDK">
 ```python showLineNumbers title="Example"
 import litellm
 import os
 ## set ENV variables
 os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
 os.environ["VERTEXAI_LOCATION"] = "us-central1"
 response = litellm.completion(
  model="vertex_ai/gemini/<your-finetuned-model>",  # e.g. vertex_ai/gemini/4965075652664360960
  messages=[{ "content": "Hello, how are you?","role": "user"}],
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="LiteLLM Proxy">
 1. Add Vertex Credentials to your env 
 ```bash title="Authenticate to Vertex AI"
 !gcloud auth application-default login
 ```
 2. Setup config.yaml 
 ```yaml showLineNumbers title="Add to litellm config"
 - model_name: finetuned-gemini
  litellm_params:
    model: vertex_ai/gemini/<ENDPOINT_ID>
    vertex_project: <PROJECT_ID>
    vertex_location: <LOCATION>
 ```
 3. Test it! 
 <Tabs>
 <TabItem value="openai" label="OpenAI Python SDK">
 ```python showLineNumbers title="Example request"
 from openai import OpenAI
 client = OpenAI(
    api_key="your-litellm-key",
    base_url="http://0.0.0.0:4000"
 )
 response = client.chat.completions.create(
    model="finetuned-gemini",
    messages=[
        {"role": "user", "content": "hi"}
    ]
 )
 print(response)
 ```
 </TabItem>
 <TabItem value="curl" label="curl">
 ```bash showLineNumbers title="Example request"
 curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: <LITELLM_KEY>' \
 --data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
 ```
 </TabItem>
 </Tabs>
 </TabItem>
 </Tabs>
 ## Model Garden
 :::tip
@ -1479,67 +1576,6 @@ response = completion(
 </Tabs>
 ## Gemini Pro
 | Model Name       | Function Call                        |
 |------------------|--------------------------------------|
 | gemini-pro   | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
 ## Fine-tuned Models
 Fine tuned models on vertex have a numerical model/endpoint id. 
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import completion
 import os
 ## set ENV variables
 os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
 os.environ["VERTEXAI_LOCATION"] = "us-central1"
 response = completion(
  model="vertex_ai/<your-finetuned-model>",  # e.g. vertex_ai/4965075652664360960
  messages=[{ "content": "Hello, how are you?","role": "user"}],
  base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 1. Add Vertex Credentials to your env 
 ```bash
 !gcloud auth application-default login
 ```
 2. Setup config.yaml 
 ```yaml
 - model_name: finetuned-gemini
  litellm_params:
    model: vertex_ai/<ENDPOINT_ID>
    vertex_project: <PROJECT_ID>
    vertex_location: <LOCATION>
  model_info:
    base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
 ```
 3. Test it! 
 ```bash
 curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: <LITELLM_KEY>' \
 --data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
 ```
 </TabItem>
 </Tabs>
 ## Gemini Pro Vision
 | Model Name       | Function Call                        |
@ -1684,23 +1720,25 @@ assert isinstance(
 ```
-## Usage - PDF / Videos / etc. Files 
+## Usage - PDF / Videos / Audio etc. Files 
 Pass any file supported by Vertex AI, through LiteLLM. 
-LiteLLM Supports the following image types passed in url
+LiteLLM Supports the following file types passed in url. 
 Using `file` message type for VertexAI is live from v1.65.1+ 
 ```
-Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
+Files with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
-Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
+Files with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
 Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
-Base64 Encoded Local Images
+Base64 Encoded Local Files
 ```
 <Tabs>
 <TabItem value="sdk" label="SDK">
-### **Using `gs://`**
+### **Using `gs://` or any URL**
 ```python
 from litellm import completion
@ -1712,8 +1750,11 @@ response = completion(
            "content": [
                {"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
                {
-                    "type": "image_url",
+                    "type": "file",
-                    "image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", # 👈 PDF
+                    "file": {
                        "file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
                        "format": "application/pdf" # OPTIONAL - specify mime-type
                    }
                },
            ],
        }
@ -1747,8 +1788,16 @@ response = completion(
            "content": [
                {"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
                {
-                    "type": "image_url",
+                    "type": "file",
-                    "image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
+                    "file": {
                        "file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
                    }  
                },
                {
                    "type": "audio_input",
                    "audio_input {
                        "audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
                    }  
                },
            ],
        }
@ -1794,8 +1843,11 @@ curl http://0.0.0.0:4000/v1/chat/completions \
            "text": "You are a very professional document summarization specialist. Please summarize the given document"
          },
          {
-                "type": "image_url",
+                "type": "file",
-                "image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf" # 👈 PDF
+                "file": {
                    "file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
                    "format": "application/pdf" # OPTIONAL
                }
            }
          }
        ]
@ -1822,10 +1874,17 @@ curl http://0.0.0.0:4000/v1/chat/completions \
            "text": "You are a very professional document summarization specialist. Please summarize the given document"
          },
          {
-                "type": "image_url",
+                "type": "file",
-                "image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
+                "file": {
-            }
+                    "file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
                },
            },
            {
                "type": "audio_input",
                "audio_input {
                    "audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
                }  
            },
    ]
      }
    ],
@ -1836,6 +1895,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
 </TabItem>
 </Tabs>
 ## Chat Models
 | Model Name       | Function Call                        |
 |------------------|--------------------------------------|
@ -2044,7 +2104,12 @@ print(response)
 ## **Multi-Modal Embeddings**
-Usage
+
 Known Limitations:
 - Only supports 1 image / video / image per request
 - Only supports GCS or base64 encoded images / videos
 ### Usage
 <Tabs>
 <TabItem value="sdk" label="SDK">
@ -2260,6 +2325,115 @@ print(f"Text Embedding: {embeddings.text_embedding}")
 </Tabs>
 ### Text + Image + Video Embeddings
 <Tabs>
 <TabItem value="sdk" label="SDK">
 Text + Image 
 ```python
 response = await litellm.aembedding(
    model="vertex_ai/multimodalembedding@001",
    input=["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"] # will be sent as a gcs image
 )
 ```
 Text + Video 
 ```python
 response = await litellm.aembedding(
    model="vertex_ai/multimodalembedding@001",
    input=["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
 )
 ```
 Image + Video 
 ```python
 response = await litellm.aembedding(
    model="vertex_ai/multimodalembedding@001",
    input=["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="LiteLLM PROXY (Unified Endpoint)">
 1. Add model to config.yaml
 ```yaml
 model_list:
  - model_name: multimodalembedding@001
    litellm_params:
      model: vertex_ai/multimodalembedding@001
      vertex_project: "adroit-crow-413218"
      vertex_location: "us-central1"
      vertex_credentials: adroit-crow-413218-a956eef1a2a8.json 
 litellm_settings:
  drop_params: True
 ```
 2. Start Proxy 
 ```
 $ litellm --config /path/to/config.yaml
 ```
 3. Make Request use OpenAI Python SDK, Langchain Python SDK
 Text + Image 
 ```python
 import openai
 client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
 # # request sent to model set on litellm proxy, `litellm --model`
 response = client.embeddings.create(
    model="multimodalembedding@001", 
    input = ["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"],
 )
 print(response)
 ```
 Text + Video 
 ```python
 import openai
 client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
 # # request sent to model set on litellm proxy, `litellm --model`
 response = client.embeddings.create(
    model="multimodalembedding@001", 
    input = ["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"],
 )
 print(response)
 ```
 Image + Video 
 ```python
 import openai
 client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
 # # request sent to model set on litellm proxy, `litellm --model`
 response = client.embeddings.create(
    model="multimodalembedding@001", 
    input = ["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"],
 )
 print(response)
 ```
 </TabItem>
 </Tabs>
 ## **Image Generation Models**
 Usage 
--- a/docs/my-website/docs/proxy/admin_ui_sso.md
+++ b/docs/my-website/docs/proxy/admin_ui_sso.md
@ -147,6 +147,11 @@ Some SSO providers require a specific redirect url for login and logout. You can
 - Login: `<your-proxy-base-url>/sso/key/generate`
 - Logout: `<your-proxy-base-url>`
 Here's the env var to set the logout url on the proxy
 ```bash
 PROXY_LOGOUT_URL="https://www.google.com"
 ```
 #### Step 3. Set `PROXY_BASE_URL` in your .env
 Set this in your .env (so the proxy can set the correct redirect url)
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@ -160,7 +160,7 @@ general_settings:
 | database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
 | database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
 | database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
-| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
+| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key [Doc on graceful db unavailability](prod#5-if-running-litellm-on-vpc-gracefully-handle-db-unavailability) |
 | custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
 | max_parallel_requests | integer | The max parallel requests allowed per deployment |
 | global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
@ -479,7 +479,7 @@ router_settings:
 | PROXY_ADMIN_ID | Admin identifier for proxy server
 | PROXY_BASE_URL | Base URL for proxy service
 | PROXY_LOGOUT_URL | URL for logging out of the proxy service
-| PROXY_MASTER_KEY | Master key for proxy authentication
+| LITELLM_MASTER_KEY | Master key for proxy authentication
 | QDRANT_API_BASE | Base URL for Qdrant API
 | QDRANT_API_KEY | API key for Qdrant service
 | QDRANT_URL | Connection URL for Qdrant database
@ -515,4 +515,5 @@ router_settings:
 | UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
 | UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
 | USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
 | USE_PRISMA_MIGRATE | Flag to use prisma migrate instead of prisma db push. Recommended for production environments.
 | WEBHOOK_URL | URL for receiving webhooks from external services
--- a/docs/my-website/docs/proxy/prod.md
+++ b/docs/my-website/docs/proxy/prod.md
@ -94,15 +94,31 @@ This disables the load_dotenv() functionality, which will automatically load you
 ## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
-This will allow LiteLLM to continue to process requests even if the DB is unavailable. This is better handling for DB unavailability.
+When running LiteLLM on a VPC (and inaccessible from the public internet), you can enable graceful degradation so that request processing continues even if the database is temporarily unavailable.
 **WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
-```yaml
+#### Configuration
 ```yaml showLineNumbers title="litellm config.yaml"
 general_settings:
  allow_requests_on_db_unavailable: True
 ```
 #### Expected Behavior
 When `allow_requests_on_db_unavailable` is set to `true`, LiteLLM will handle errors as follows:
 | Type of Error | Expected Behavior | Details |
 |---------------|-------------------|----------------|
 | Prisma Errors | ✅ Request will be allowed | Covers issues like DB connection resets or rejections from the DB via Prisma, the ORM used by LiteLLM. |
 | Httpx Errors | ✅ Request will be allowed | Occurs when the database is unreachable, allowing the request to proceed despite the DB outage. |
 | Pod Startup Behavior | ✅ Pods start regardless | LiteLLM Pods will start even if the database is down or unreachable, ensuring higher uptime guarantees for deployments. |
 | Health/Readiness Check | ✅ Always returns 200 OK | The /health/readiness endpoint returns a 200 OK status to ensure that pods remain operational even when the database is unavailable.
 | LiteLLM Budget Errors or Model Errors | ❌ Request will be blocked | Triggered when the DB is reachable but the authentication token is invalid, lacks access, or exceeds budget limits. |
 ## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
 By default, LiteLLM writes several types of logs to the database:
@ -183,93 +199,3 @@ You should only see the following level of details in logs on the proxy server
 # INFO:     192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
 # INFO:     192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
 ```
 ### Machine Specifications to Deploy LiteLLM
 | Service | Spec | CPUs | Memory | Architecture | Version|
 | --- | --- | --- | --- | --- | --- | 
 | Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
 | Redis Cache | - | - | - | - | 7.0+ Redis Engine|
 ### Reference Kubernetes Deployment YAML
 Reference Kubernetes `deployment.yaml` that was load tested by us
 ```yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm-deployment
 spec:
  replicas: 3
  selector:
    matchLabels:
      app: litellm
  template:
    metadata:
      labels:
        app: litellm
    spec:
      containers:
        - name: litellm-container
          image: ghcr.io/berriai/litellm:main-latest
          imagePullPolicy: Always
          env:
            - name: AZURE_API_KEY
              value: "d6******"
            - name: AZURE_API_BASE
              value: "https://ope******"
            - name: LITELLM_MASTER_KEY
              value: "sk-1234"
            - name: DATABASE_URL
              value: "po**********"
          args:
            - "--config"
            - "/app/proxy_config.yaml"  # Update the path to mount the config file
          volumeMounts:                 # Define volume mount for proxy_config.yaml
            - name: config-volume
              mountPath: /app
              readOnly: true
          livenessProbe:
            httpGet:
              path: /health/liveliness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
          readinessProbe:
            httpGet:
              path: /health/readiness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
      volumes:  # Define volume to mount proxy_config.yaml
        - name: config-volume
          configMap:
            name: litellm-config  
 ```
 Reference Kubernetes `service.yaml` that was load tested by us
 ```yaml
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm-service
 spec:
  selector:
    app: litellm
  ports:
    - protocol: TCP
      port: 4000
      targetPort: 4000
  type: LoadBalancer
 ```
--- a/docs/my-website/docs/set_keys.md
+++ b/docs/my-website/docs/set_keys.md
@ -188,7 +188,13 @@ Currently implemented for:
 - OpenAI (if OPENAI_API_KEY is set)
 - Fireworks AI (if FIREWORKS_AI_API_KEY is set)
 - LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
 - Gemini (if GEMINI_API_KEY is set)
 - XAI (if XAI_API_KEY is set)   
 - Anthropic (if ANTHROPIC_API_KEY is set)
 You can also specify a custom provider to check:
 **All providers**:
 ```python
 from litellm import get_valid_models
@ -196,6 +202,14 @@ valid_models = get_valid_models(check_provider_endpoint=True)
 print(valid_models)
 ```
 **Specific provider**:
 ```python
 from litellm import get_valid_models
 valid_models = get_valid_models(check_provider_endpoint=True, custom_llm_provider="openai")
 print(valid_models)
 ```
 ### `validate_environment(model: str)`
 This helper tells you if you have all the required environment variables for a model, and if not - what's missing. 
--- a/docs/my-website/docs/tutorials/openweb_ui.md
+++ b/docs/my-website/docs/tutorials/openweb_ui.md
@ -98,6 +98,5 @@ On the models dropdown select `thinking-anthropic-claude-3-7-sonnet`
 <Image img={require('../../img/litellm_thinking_openweb.gif')} />
-
+## Additional Resources
-
+- Running LiteLLM and OpenWebUI on Windows Localhost: A Comprehensive Guide [https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/](https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/)
--- a/docs/my-website/img/mcp_2.png
+++ b/docs/my-website/img/mcp_2.png
--- a/docs/my-website/img/mcp_ui.png
+++ b/docs/my-website/img/mcp_ui.png
--- a/docs/my-website/img/release_notes/mcp_ui.png
+++ b/docs/my-website/img/release_notes/mcp_ui.png
--- a/docs/my-website/img/release_notes/team_model_add.png
+++ b/docs/my-website/img/release_notes/team_model_add.png
--- a/docs/my-website/img/release_notes/ui_usage.png
+++ b/docs/my-website/img/release_notes/ui_usage.png
--- a/docs/my-website/release_notes/v1.63.14/index.md
+++ b/docs/my-website/release_notes/v1.63.14/index.md
@ -24,6 +24,7 @@ This release brings:
 - LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
 - Perf improvements for Usage-based Routing
 - Streaming guardrail support via websockets
 - Azure OpenAI client perf fix (from previous release)
 ## Docker Run LiteLLM Proxy
@ -31,7 +32,7 @@ This release brings:
 docker run
 -e STORE_MODEL_IN_DB=True
 -p 4000:4000
-ghcr.io/berriai/litellm:main-v1.63.14-stable
+ghcr.io/berriai/litellm:main-v1.63.14-stable.patch1
 ```
 ## Demo Instance
--- a/docs/my-website/release_notes/v1.65.0-stable/index.md
+++ b/docs/my-website/release_notes/v1.65.0-stable/index.md
@ -0,0 +1,160 @@
 ---
 title: v1.65.0-stable - Model Context Protocol
 slug: v1.65.0-stable
 date: 2025-03-30T10:00:00
 authors:
  - name: Krrish Dholakia
    title: CEO, LiteLLM
    url: https://www.linkedin.com/in/krish-d/
    image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
  - name: Ishaan Jaffer
    title: CTO, LiteLLM
    url: https://www.linkedin.com/in/reffajnaahsi/
    image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
 tags: [mcp, custom_prompt_management]
 hide_table_of_contents: false
 ---
 import Image from '@theme/IdealImage';
 v1.65.0-stable is live now. Here are the key highlights of this release:
 - **MCP Support**: Support for adding and using MCP servers on the LiteLLM proxy.
 - **UI view total usage after 1M+ logs**: You can now view usage analytics after crossing 1M+ logs in DB. 
 ## Model Context Protocol (MCP)
 This release introduces support for centrally adding MCP servers on LiteLLM. This allows you to add MCP server endpoints and your developers can `list` and `call` MCP tools through LiteLLM.
 Read more about MCP [here](https://docs.litellm.ai/docs/mcp).
 <Image 
  img={require('../../img/release_notes/mcp_ui.png')}
  style={{width: '100%', display: 'block', margin: '2rem auto'}}
 />
 <p style={{textAlign: 'left', color: '#666'}}>
  Expose and use MCP servers through LiteLLM
 </p>
 ## UI view total usage after 1M+ logs
 This release brings the ability to view total usage analytics even after exceeding 1M+ logs in your database. We've implemented a scalable architecture that stores only aggregate usage data, resulting in significantly more efficient queries and reduced database CPU utilization.
 <Image 
  img={require('../../img/release_notes/ui_usage.png')}
  style={{width: '100%', display: 'block', margin: '2rem auto'}}
 />
 <p style={{textAlign: 'left', color: '#666'}}>
  View total usage after 1M+ logs
 </p>
 - How this works:
    - We now aggregate usage data into a dedicated DailyUserSpend table, significantly reducing query load and CPU usage even beyond 1M+ logs.
 - Daily Spend Breakdown API:
    - Retrieve granular daily usage data (by model, provider, and API key) with a single endpoint.
    Example Request:
    ```shell title="Daily Spend Breakdown API" showLineNumbers
    curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \
    -H 'Authorization: Bearer sk-...'
    ```
    ```json title="Daily Spend Breakdown API Response" showLineNumbers
    {
        "results": [
            {
                "date": "2025-03-27",
                "metrics": {
                    "spend": 0.0177072,
                    "prompt_tokens": 111,
                    "completion_tokens": 1711,
                    "total_tokens": 1822,
                    "api_requests": 11
                },
                "breakdown": {
                    "models": {
                        "gpt-4o-mini": {
                            "spend": 1.095e-05,
                            "prompt_tokens": 37,
                            "completion_tokens": 9,
                            "total_tokens": 46,
                            "api_requests": 1
                    },
                    "providers": { "openai": { ... }, "azure_ai": { ... } },
                    "api_keys": { "3126b6eaf1...": { ... } }
                }
            }
        ],
        "metadata": {
            "total_spend": 0.7274667,
            "total_prompt_tokens": 280990,
            "total_completion_tokens": 376674,
            "total_api_requests": 14
        }
    }
    ```
 ## New Models / Updated Models
 - Support for Vertex AI gemini-2.0-flash-lite & Google AI Studio gemini-2.0-flash-lite [PR](https://github.com/BerriAI/litellm/pull/9523)
 - Support for Vertex AI Fine-Tuned LLMs [PR](https://github.com/BerriAI/litellm/pull/9542)
 - Nova Canvas image generation support [PR](https://github.com/BerriAI/litellm/pull/9525)
 - OpenAI gpt-4o-transcribe support [PR](https://github.com/BerriAI/litellm/pull/9517)
 - Added new Vertex AI text embedding model [PR](https://github.com/BerriAI/litellm/pull/9476)
 ## LLM Translation
 - OpenAI Web Search Tool Call Support [PR](https://github.com/BerriAI/litellm/pull/9465)
 - Vertex AI topLogprobs support [PR](https://github.com/BerriAI/litellm/pull/9518) 
 - Support for sending images and video to Vertex AI multimodal embedding [Doc](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings)
 - Support litellm.api_base for Vertex AI + Gemini across completion, embedding, image_generation [PR](https://github.com/BerriAI/litellm/pull/9516)
 - Bug fix for returning `response_cost` when using litellm python SDK with LiteLLM Proxy [PR](https://github.com/BerriAI/litellm/commit/6fd18651d129d606182ff4b980e95768fc43ca3d)
 - Support for `max_completion_tokens` on Mistral API [PR](https://github.com/BerriAI/litellm/pull/9606)
 - Refactored Vertex AI passthrough routes - fixes unpredictable behaviour with auto-setting default_vertex_region on router model add [PR](https://github.com/BerriAI/litellm/pull/9467)
 ## Spend Tracking Improvements
 - Log 'api_base' on spend logs [PR](https://github.com/BerriAI/litellm/pull/9509)
 - Support for Gemini audio token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
 - Fixed OpenAI audio input token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
 ## UI
 ### Model Management
 - Allowed team admins to add/update/delete models on UI [PR](https://github.com/BerriAI/litellm/pull/9572)
 - Added render supports_web_search on model hub [PR](https://github.com/BerriAI/litellm/pull/9469)
 ### Request Logs
 - Show API base and model ID on request logs [PR](https://github.com/BerriAI/litellm/pull/9572)
 - Allow viewing keyinfo on request logs [PR](https://github.com/BerriAI/litellm/pull/9568)
 ### Usage Tab
 - Added Daily User Spend Aggregate view - allows UI Usage tab to work > 1m rows [PR](https://github.com/BerriAI/litellm/pull/9538)
 - Connected UI to "LiteLLM_DailyUserSpend" spend table [PR](https://github.com/BerriAI/litellm/pull/9603)
 ## Logging Integrations
 - Fixed StandardLoggingPayload for GCS Pub Sub Logging Integration [PR](https://github.com/BerriAI/litellm/pull/9508)
 - Track `litellm_model_name` on `StandardLoggingPayload` [Docs](https://docs.litellm.ai/docs/proxy/logging_spec#standardlogginghiddenparams)
 ## Performance / Reliability Improvements
 - LiteLLM Redis semantic caching implementation [PR](https://github.com/BerriAI/litellm/pull/9356)
 - Gracefully handle exceptions when DB is having an outage [PR](https://github.com/BerriAI/litellm/pull/9533)
 - Allow Pods to startup + passing /health/readiness when allow_requests_on_db_unavailable: True and DB is down [PR](https://github.com/BerriAI/litellm/pull/9569)
 ## General Improvements
 - Support for exposing MCP tools on litellm proxy [PR](https://github.com/BerriAI/litellm/pull/9426)
 - Support discovering Gemini, Anthropic, xAI models by calling their /v1/model endpoint [PR](https://github.com/BerriAI/litellm/pull/9530)
 - Fixed route check for non-proxy admins on JWT auth [PR](https://github.com/BerriAI/litellm/pull/9454)
 - Added baseline Prisma database migrations [PR](https://github.com/BerriAI/litellm/pull/9565)
 - View all wildcard models on /model/info [PR](https://github.com/BerriAI/litellm/pull/9572)
 ## Security
 - Bumped next from 14.2.21 to 14.2.25 in UI dashboard [PR](https://github.com/BerriAI/litellm/pull/9458)
 ## Complete Git Diff
 [Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.63.14-stable.patch1...v1.65.0-stable)
--- a/docs/my-website/release_notes/v1.65.0/index.md
+++ b/docs/my-website/release_notes/v1.65.0/index.md
@ -0,0 +1,34 @@
 ---
 title: v1.65.0 - Team Model Add - update
 slug: v1.65.0
 date: 2025-03-28T10:00:00
 authors:
  - name: Krrish Dholakia
    title: CEO, LiteLLM
    url: https://www.linkedin.com/in/krish-d/
    image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
  - name: Ishaan Jaffer
    title: CTO, LiteLLM
    url: https://www.linkedin.com/in/reffajnaahsi/
    image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
 tags: [management endpoints, team models, ui]
 hide_table_of_contents: false
 ---
 import Image from '@theme/IdealImage';
 v1.65.0 updates the `/model/new` endpoint to prevent non-team admins from creating team models.
 This means that only proxy admins or team admins can create team models.
 ## Additional Changes
 - Allows team admins to call `/model/update` to update team models.
 - Allows team admins to call `/model/delete` to delete team models.
 - Introduces new `user_models_only` param to `/v2/model/info` - only return models added by this user.
 These changes enable team admins to add and manage models for their team on the LiteLLM UI + API.
 <Image img={require('../../img/release_notes/team_model_add.png')} />
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -304,7 +304,6 @@ const sidebars = {
            "image_variations",
          ]
        },
        "mcp",
        {
          type: "category",
          label: "/audio",
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@ -444,9 +444,7 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
        detected_secrets = []
        for file in secrets.files:
            for found_secret in secrets[file]:
                if found_secret.secret_value is None:
                    continue
                detected_secrets.append(
@ -471,14 +469,12 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
        data: dict,
        call_type: str,  # "completion", "embeddings", "image_generation", "moderation"
    ):
        if await self.should_run_check(user_api_key_dict) is False:
            return
        if "messages" in data and isinstance(data["messages"], list):
            for message in data["messages"]:
                if "content" in message and isinstance(message["content"], str):
                    detected_secrets = self.scan_message_for_secrets(message["content"])
                    for secret in detected_secrets:
--- a/litellm-proxy-extras/LICENSE
+++ b/litellm-proxy-extras/LICENSE
@ -0,0 +1,26 @@
 Portions of this software are licensed as follows:
 * All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
 * Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
 ---
 MIT License
 Copyright (c) 2023 Berri AI
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/litellm-proxy-extras/README.md
+++ b/litellm-proxy-extras/README.md
@ -0,0 +1,21 @@
 Additional files for the proxy. Reduces the size of the main litellm package.
 Currently, only stores the migration.sql files for litellm-proxy.
 To install, run:
 ```bash
 pip install litellm-proxy-extras
 ```
 OR 
 ```bash
 pip install litellm[proxy] # installs litellm-proxy-extras and other proxy dependencies.
 ```
 To use the migrations, run:
 ```bash
 litellm --use_prisma_migrate
 ```
--- a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0-py3-none-any.whl
+++ b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0-py3-none-any.whl
--- a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0.tar.gz
+++ b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0.tar.gz
--- a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1-py3-none-any.whl
+++ b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1-py3-none-any.whl
--- a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1.tar.gz
+++ b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1.tar.gz
--- a/litellm-proxy-extras/litellm_proxy_extras/init.py
+++ b/litellm-proxy-extras/litellm_proxy_extras/init.py
--- a/litellm-proxy-extras/litellm_proxy_extras/_logging.py
+++ b/litellm-proxy-extras/litellm_proxy_extras/_logging.py
@ -0,0 +1,12 @@
 import logging
 # Set up package logger
 logger = logging.getLogger("litellm_proxy_extras")
 if not logger.handlers:  # Only add handler if none exists
    handler = logging.StreamHandler()
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250326162113_baseline/migration.sql
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250326162113_baseline/migration.sql
@ -0,0 +1,360 @@
 -- CreateTable
 CREATE TABLE "LiteLLM_BudgetTable" (
    "budget_id" TEXT NOT NULL,
    "max_budget" DOUBLE PRECISION,
    "soft_budget" DOUBLE PRECISION,
    "max_parallel_requests" INTEGER,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "model_max_budget" JSONB,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_BudgetTable_pkey" PRIMARY KEY ("budget_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_CredentialsTable" (
    "credential_id" TEXT NOT NULL,
    "credential_name" TEXT NOT NULL,
    "credential_values" JSONB NOT NULL,
    "credential_info" JSONB,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_CredentialsTable_pkey" PRIMARY KEY ("credential_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_ProxyModelTable" (
    "model_id" TEXT NOT NULL,
    "model_name" TEXT NOT NULL,
    "litellm_params" JSONB NOT NULL,
    "model_info" JSONB,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_ProxyModelTable_pkey" PRIMARY KEY ("model_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_OrganizationTable" (
    "organization_id" TEXT NOT NULL,
    "organization_alias" TEXT NOT NULL,
    "budget_id" TEXT NOT NULL,
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "models" TEXT[],
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_OrganizationTable_pkey" PRIMARY KEY ("organization_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_ModelTable" (
    "id" SERIAL NOT NULL,
    "aliases" JSONB,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_ModelTable_pkey" PRIMARY KEY ("id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_TeamTable" (
    "team_id" TEXT NOT NULL,
    "team_alias" TEXT,
    "organization_id" TEXT,
    "admins" TEXT[],
    "members" TEXT[],
    "members_with_roles" JSONB NOT NULL DEFAULT '{}',
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "max_budget" DOUBLE PRECISION,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "models" TEXT[],
    "max_parallel_requests" INTEGER,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "blocked" BOOLEAN NOT NULL DEFAULT false,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "model_max_budget" JSONB NOT NULL DEFAULT '{}',
    "model_id" INTEGER,
    CONSTRAINT "LiteLLM_TeamTable_pkey" PRIMARY KEY ("team_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_UserTable" (
    "user_id" TEXT NOT NULL,
    "user_alias" TEXT,
    "team_id" TEXT,
    "sso_user_id" TEXT,
    "organization_id" TEXT,
    "password" TEXT,
    "teams" TEXT[] DEFAULT ARRAY[]::TEXT[],
    "user_role" TEXT,
    "max_budget" DOUBLE PRECISION,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "user_email" TEXT,
    "models" TEXT[],
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "max_parallel_requests" INTEGER,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "model_max_budget" JSONB NOT NULL DEFAULT '{}',
    "created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    CONSTRAINT "LiteLLM_UserTable_pkey" PRIMARY KEY ("user_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_VerificationToken" (
    "token" TEXT NOT NULL,
    "key_name" TEXT,
    "key_alias" TEXT,
    "soft_budget_cooldown" BOOLEAN NOT NULL DEFAULT false,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "expires" TIMESTAMP(3),
    "models" TEXT[],
    "aliases" JSONB NOT NULL DEFAULT '{}',
    "config" JSONB NOT NULL DEFAULT '{}',
    "user_id" TEXT,
    "team_id" TEXT,
    "permissions" JSONB NOT NULL DEFAULT '{}',
    "max_parallel_requests" INTEGER,
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "blocked" BOOLEAN,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "max_budget" DOUBLE PRECISION,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "model_max_budget" JSONB NOT NULL DEFAULT '{}',
    "budget_id" TEXT,
    "organization_id" TEXT,
    "created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT,
    "updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT,
    CONSTRAINT "LiteLLM_VerificationToken_pkey" PRIMARY KEY ("token")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_EndUserTable" (
    "user_id" TEXT NOT NULL,
    "alias" TEXT,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "allowed_model_region" TEXT,
    "default_model" TEXT,
    "budget_id" TEXT,
    "blocked" BOOLEAN NOT NULL DEFAULT false,
    CONSTRAINT "LiteLLM_EndUserTable_pkey" PRIMARY KEY ("user_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_Config" (
    "param_name" TEXT NOT NULL,
    "param_value" JSONB,
    CONSTRAINT "LiteLLM_Config_pkey" PRIMARY KEY ("param_name")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_SpendLogs" (
    "request_id" TEXT NOT NULL,
    "call_type" TEXT NOT NULL,
    "api_key" TEXT NOT NULL DEFAULT '',
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "total_tokens" INTEGER NOT NULL DEFAULT 0,
    "prompt_tokens" INTEGER NOT NULL DEFAULT 0,
    "completion_tokens" INTEGER NOT NULL DEFAULT 0,
    "startTime" TIMESTAMP(3) NOT NULL,
    "endTime" TIMESTAMP(3) NOT NULL,
    "completionStartTime" TIMESTAMP(3),
    "model" TEXT NOT NULL DEFAULT '',
    "model_id" TEXT DEFAULT '',
    "model_group" TEXT DEFAULT '',
    "custom_llm_provider" TEXT DEFAULT '',
    "api_base" TEXT DEFAULT '',
    "user" TEXT DEFAULT '',
    "metadata" JSONB DEFAULT '{}',
    "cache_hit" TEXT DEFAULT '',
    "cache_key" TEXT DEFAULT '',
    "request_tags" JSONB DEFAULT '[]',
    "team_id" TEXT,
    "end_user" TEXT,
    "requester_ip_address" TEXT,
    "messages" JSONB DEFAULT '{}',
    "response" JSONB DEFAULT '{}',
    CONSTRAINT "LiteLLM_SpendLogs_pkey" PRIMARY KEY ("request_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_ErrorLogs" (
    "request_id" TEXT NOT NULL,
    "startTime" TIMESTAMP(3) NOT NULL,
    "endTime" TIMESTAMP(3) NOT NULL,
    "api_base" TEXT NOT NULL DEFAULT '',
    "model_group" TEXT NOT NULL DEFAULT '',
    "litellm_model_name" TEXT NOT NULL DEFAULT '',
    "model_id" TEXT NOT NULL DEFAULT '',
    "request_kwargs" JSONB NOT NULL DEFAULT '{}',
    "exception_type" TEXT NOT NULL DEFAULT '',
    "exception_string" TEXT NOT NULL DEFAULT '',
    "status_code" TEXT NOT NULL DEFAULT '',
    CONSTRAINT "LiteLLM_ErrorLogs_pkey" PRIMARY KEY ("request_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_UserNotifications" (
    "request_id" TEXT NOT NULL,
    "user_id" TEXT NOT NULL,
    "models" TEXT[],
    "justification" TEXT NOT NULL,
    "status" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_UserNotifications_pkey" PRIMARY KEY ("request_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_TeamMembership" (
    "user_id" TEXT NOT NULL,
    "team_id" TEXT NOT NULL,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "budget_id" TEXT,
    CONSTRAINT "LiteLLM_TeamMembership_pkey" PRIMARY KEY ("user_id","team_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_OrganizationMembership" (
    "user_id" TEXT NOT NULL,
    "organization_id" TEXT NOT NULL,
    "user_role" TEXT,
    "spend" DOUBLE PRECISION DEFAULT 0.0,
    "budget_id" TEXT,
    "created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    CONSTRAINT "LiteLLM_OrganizationMembership_pkey" PRIMARY KEY ("user_id","organization_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_InvitationLink" (
    "id" TEXT NOT NULL,
    "user_id" TEXT NOT NULL,
    "is_accepted" BOOLEAN NOT NULL DEFAULT false,
    "accepted_at" TIMESTAMP(3),
    "expires_at" TIMESTAMP(3) NOT NULL,
    "created_at" TIMESTAMP(3) NOT NULL,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_InvitationLink_pkey" PRIMARY KEY ("id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_AuditLog" (
    "id" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "changed_by" TEXT NOT NULL DEFAULT '',
    "changed_by_api_key" TEXT NOT NULL DEFAULT '',
    "action" TEXT NOT NULL,
    "table_name" TEXT NOT NULL,
    "object_id" TEXT NOT NULL,
    "before_value" JSONB,
    "updated_values" JSONB,
    CONSTRAINT "LiteLLM_AuditLog_pkey" PRIMARY KEY ("id")
 );
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_CredentialsTable_credential_name_key" ON "LiteLLM_CredentialsTable"("credential_name");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_TeamTable_model_id_key" ON "LiteLLM_TeamTable"("model_id");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_UserTable_sso_user_id_key" ON "LiteLLM_UserTable"("sso_user_id");
 -- CreateIndex
 CREATE INDEX "LiteLLM_SpendLogs_startTime_idx" ON "LiteLLM_SpendLogs"("startTime");
 -- CreateIndex
 CREATE INDEX "LiteLLM_SpendLogs_end_user_idx" ON "LiteLLM_SpendLogs"("end_user");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_OrganizationMembership_user_id_organization_id_key" ON "LiteLLM_OrganizationMembership"("user_id", "organization_id");
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationTable" ADD CONSTRAINT "LiteLLM_OrganizationTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_model_id_fkey" FOREIGN KEY ("model_id") REFERENCES "LiteLLM_ModelTable"("id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_UserTable" ADD CONSTRAINT "LiteLLM_UserTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_EndUserTable" ADD CONSTRAINT "LiteLLM_EndUserTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_TeamMembership" ADD CONSTRAINT "LiteLLM_TeamMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_updated_by_fkey" FOREIGN KEY ("updated_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250326171002_add_daily_user_table/migration.sql
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250326171002_add_daily_user_table/migration.sql
@ -0,0 +1,33 @@
 -- CreateTable
 CREATE TABLE "LiteLLM_DailyUserSpend" (
    "id" TEXT NOT NULL,
    "user_id" TEXT NOT NULL,
    "date" TEXT NOT NULL,
    "api_key" TEXT NOT NULL,
    "model" TEXT NOT NULL,
    "model_group" TEXT,
    "custom_llm_provider" TEXT,
    "prompt_tokens" INTEGER NOT NULL DEFAULT 0,
    "completion_tokens" INTEGER NOT NULL DEFAULT 0,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) NOT NULL,
    CONSTRAINT "LiteLLM_DailyUserSpend_pkey" PRIMARY KEY ("id")
 );
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_date_idx" ON "LiteLLM_DailyUserSpend"("date");
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_user_id_idx" ON "LiteLLM_DailyUserSpend"("user_id");
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_api_key_idx" ON "LiteLLM_DailyUserSpend"("api_key");
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_model_idx" ON "LiteLLM_DailyUserSpend"("model");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_DailyUserSpend_user_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyUserSpend"("user_id", "date", "api_key", "model", "custom_llm_provider");
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250327180120_add_api_requests_to_daily_user_table/migration.sql
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250327180120_add_api_requests_to_daily_user_table/migration.sql
@ -0,0 +1,3 @@
 -- AlterTable
 ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN     "api_requests" INTEGER NOT NULL DEFAULT 0;
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250329084805_new_cron_job_table/migration.sql
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250329084805_new_cron_job_table/migration.sql
@ -0,0 +1,14 @@
 -- CreateEnum
 CREATE TYPE "JobStatus" AS ENUM ('ACTIVE', 'INACTIVE');
 -- CreateTable
 CREATE TABLE "LiteLLM_CronJob" (
    "cronjob_id" TEXT NOT NULL,
    "pod_id" TEXT NOT NULL,
    "status" "JobStatus" NOT NULL DEFAULT 'INACTIVE',
    "last_updated" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "ttl" TIMESTAMP(3) NOT NULL,
    CONSTRAINT "LiteLLM_CronJob_pkey" PRIMARY KEY ("cronjob_id")
 );
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/migration_lock.toml
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/migration_lock.toml
@ -0,0 +1 @@
 provider = "postgresql"
--- a/litellm-proxy-extras/litellm_proxy_extras/utils.py
+++ b/litellm-proxy-extras/litellm_proxy_extras/utils.py
@ -0,0 +1,80 @@
 import os
 import random
 import subprocess
 import time
 from typing import Optional
 from litellm_proxy_extras._logging import logger
 def str_to_bool(value: Optional[str]) -> bool:
    if value is None:
        return False
    return value.lower() in ("true", "1", "t", "y", "yes")
 class ProxyExtrasDBManager:
    @staticmethod
    def setup_database(schema_path: str, use_migrate: bool = False) -> bool:
        """
        Set up the database using either prisma migrate or prisma db push
        Uses migrations from litellm-proxy-extras package
        Args:
            schema_path (str): Path to the Prisma schema file
            use_migrate (bool): Whether to use prisma migrate instead of db push
        Returns:
            bool: True if setup was successful, False otherwise
        """
        use_migrate = str_to_bool(os.getenv("USE_PRISMA_MIGRATE")) or use_migrate
        for attempt in range(4):
            original_dir = os.getcwd()
            schema_dir = os.path.dirname(schema_path)
            os.chdir(schema_dir)
            try:
                if use_migrate:
                    logger.info("Running prisma migrate deploy")
                    try:
                        # Set migrations directory for Prisma
                        subprocess.run(
                            ["prisma", "migrate", "deploy"],
                            timeout=60,
                            check=True,
                            capture_output=True,
                            text=True,
                        )
                        logger.info("prisma migrate deploy completed")
                        return True
                    except subprocess.CalledProcessError as e:
                        logger.info(f"prisma db error: {e.stderr}, e: {e.stdout}")
                        if (
                            "P3005" in e.stderr
                            and "database schema is not empty" in e.stderr
                        ):
                            logger.info("Error: Database schema is not empty")
                            return False
                else:
                    # Use prisma db push with increased timeout
                    subprocess.run(
                        ["prisma", "db", "push", "--accept-data-loss"],
                        timeout=60,
                        check=True,
                    )
                    return True
            except subprocess.TimeoutExpired:
                logger.info(f"Attempt {attempt + 1} timed out")
                time.sleep(random.randrange(5, 15))
            except subprocess.CalledProcessError as e:
                attempts_left = 3 - attempt
                retry_msg = (
                    f" Retrying... ({attempts_left} attempts left)"
                    if attempts_left > 0
                    else ""
                )
                logger.info(f"The process failed to execute. Details: {e}.{retry_msg}")
                time.sleep(random.randrange(5, 15))
            finally:
                os.chdir(original_dir)
        return False
--- a/litellm-proxy-extras/pyproject.toml
+++ b/litellm-proxy-extras/pyproject.toml
@ -0,0 +1,30 @@
 [tool.poetry]
 name = "litellm-proxy-extras"
 version = "0.1.1"
 description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
 authors = ["BerriAI"]
 readme = "README.md"
 [tool.poetry.urls]
 homepage = "https://litellm.ai"
 Homepage = "https://litellm.ai"
 repository = "https://github.com/BerriAI/litellm"
 Repository = "https://github.com/BerriAI/litellm"
 documentation = "https://docs.litellm.ai"
 Documentation = "https://docs.litellm.ai"
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0, !=3.9.7"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 [tool.commitizen]
 version = "0.1.1"
 version_files = [
    "pyproject.toml:version",
    "../requirements.txt:litellm-proxy-extras==",
    "../pyproject.toml:litellm-proxy-extras = {version = \""
 ]
--- a/litellm-proxy-extras/tests/init.py
+++ b/litellm-proxy-extras/tests/init.py
--- a/litellm/init.py
+++ b/litellm/init.py
@ -2,7 +2,7 @@
 import warnings
 warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
-### INIT VARIABLES ##########
+### INIT VARIABLES ###########
 import threading
 import os
 from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
@ -122,19 +122,19 @@ langsmith_batch_size: Optional[int] = None
 prometheus_initialize_budget_metrics: Optional[bool] = False
 argilla_batch_size: Optional[int] = None
 datadog_use_v1: Optional[bool] = False  # if you want to use v1 datadog logged payload
-gcs_pub_sub_use_v1: Optional[bool] = (
+gcs_pub_sub_use_v1: Optional[
-    False  # if you want to use v1 gcs pubsub logged payload
+    bool
-)
+] = False  # if you want to use v1 gcs pubsub logged payload
 argilla_transformation_object: Optional[Dict[str, Any]] = None
-_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
+_async_input_callback: List[
-    []
+    Union[str, Callable, CustomLogger]
-)  # internal variable - async custom callbacks are routed here.
+] = []  # internal variable - async custom callbacks are routed here.
-_async_success_callback: List[Union[str, Callable, CustomLogger]] = (
+_async_success_callback: List[
-    []
+    Union[str, Callable, CustomLogger]
-)  # internal variable - async custom callbacks are routed here.
+] = []  # internal variable - async custom callbacks are routed here.
-_async_failure_callback: List[Union[str, Callable, CustomLogger]] = (
+_async_failure_callback: List[
-    []
+    Union[str, Callable, CustomLogger]
-)  # internal variable - async custom callbacks are routed here.
+] = []  # internal variable - async custom callbacks are routed here.
 pre_call_rules: List[Callable] = []
 post_call_rules: List[Callable] = []
 turn_off_message_logging: Optional[bool] = False
@ -142,18 +142,18 @@ log_raw_request_response: bool = False
 redact_messages_in_exceptions: Optional[bool] = False
 redact_user_api_key_info: Optional[bool] = False
 filter_invalid_headers: Optional[bool] = False
-add_user_information_to_llm_headers: Optional[bool] = (
+add_user_information_to_llm_headers: Optional[
-    None  # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
+    bool
-)
+] = None  # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
 store_audit_logs = False  # Enterprise feature, allow users to see audit logs
 ### end of callbacks #############
-email: Optional[str] = (
+email: Optional[
-    None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+    str
-)
+] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
-token: Optional[str] = (
+token: Optional[
-    None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+    str
-)
+] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
 telemetry = True
 max_tokens = 256  # OpenAI Defaults
 drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
@ -229,24 +229,20 @@ enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
 enable_caching_on_provider_specific_optional_params: bool = (
    False  # feature-flag for caching on optional params - e.g. 'top_k'
 )
-caching: bool = (
+caching: bool = False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
-    False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+caching_with_models: bool = False  # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
-)
+cache: Optional[
-caching_with_models: bool = (
+    Cache
-    False  # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+] = None  # cache object <- use this - https://docs.litellm.ai/docs/caching
 )
 cache: Optional[Cache] = (
    None  # cache object <- use this - https://docs.litellm.ai/docs/caching
 )
 default_in_memory_ttl: Optional[float] = None
 default_redis_ttl: Optional[float] = None
 default_redis_batch_cache_expiry: Optional[float] = None
 model_alias_map: Dict[str, str] = {}
 model_group_alias_map: Dict[str, str] = {}
 max_budget: float = 0.0  # set the max budget across all providers
-budget_duration: Optional[str] = (
+budget_duration: Optional[
-    None  # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
+    str
-)
+] = None  # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
 default_soft_budget: float = (
    50.0  # by default all litellm proxy keys have a soft budget of 50.0
 )
@ -255,15 +251,11 @@ forward_traceparent_to_llm_provider: bool = False
 _current_cost = 0.0  # private variable, used if max budget is set
 error_logs: Dict = {}
-add_function_to_prompt: bool = (
+add_function_to_prompt: bool = False  # if function calling not supported by api, append function call details to system prompt
    False  # if function calling not supported by api, append function call details to system prompt
 )
 client_session: Optional[httpx.Client] = None
 aclient_session: Optional[httpx.AsyncClient] = None
 model_fallbacks: Optional[List] = None  # Deprecated for 'litellm.fallbacks'
-model_cost_map_url: str = (
+model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
    "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
 )
 suppress_debug_info = False
 dynamodb_table_name: Optional[str] = None
 s3_callback_params: Optional[Dict] = None
@ -285,9 +277,7 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
 custom_prometheus_metadata_labels: List[str] = []
 #### REQUEST PRIORITIZATION ####
 priority_reservation: Optional[Dict[str, float]] = None
-force_ipv4: bool = (
+force_ipv4: bool = False  # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
    False  # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
 )
 module_level_aclient = AsyncHTTPHandler(
    timeout=request_timeout, client_alias="module level aclient"
 )
@ -301,13 +291,13 @@ fallbacks: Optional[List] = None
 context_window_fallbacks: Optional[List] = None
 content_policy_fallbacks: Optional[List] = None
 allowed_fails: int = 3
-num_retries_per_request: Optional[int] = (
+num_retries_per_request: Optional[
-    None  # for the request overall (incl. fallbacks + model retries)
+    int
-)
+] = None  # for the request overall (incl. fallbacks + model retries)
 ####### SECRET MANAGERS #####################
-secret_manager_client: Optional[Any] = (
+secret_manager_client: Optional[
-    None  # list of instantiated key management clients - e.g. azure kv, infisical, etc.
+    Any
-)
+] = None  # list of instantiated key management clients - e.g. azure kv, infisical, etc.
 _google_kms_resource_name: Optional[str] = None
 _key_management_system: Optional[KeyManagementSystem] = None
 _key_management_settings: KeyManagementSettings = KeyManagementSettings()
@ -813,6 +803,7 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig
 from .llms.maritalk import MaritalkConfig
 from .llms.openrouter.chat.transformation import OpenrouterConfig
 from .llms.anthropic.chat.transformation import AnthropicConfig
 from .llms.anthropic.common_utils import AnthropicModelInfo
 from .llms.groq.stt.transformation import GroqSTTConfig
 from .llms.anthropic.completion.transformation import AnthropicTextConfig
 from .llms.triton.completion.transformation import TritonConfig
@ -848,6 +839,7 @@ from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
    VertexGeminiConfig,
    VertexGeminiConfig as VertexAIConfig,
 )
 from .llms.gemini.common_utils import GeminiModelInfo
 from .llms.gemini.chat.transformation import (
    GoogleAIStudioGeminiConfig,
    GoogleAIStudioGeminiConfig as GeminiConfig,  # aliased to maintain backwards compatibility
@ -950,6 +942,12 @@ openaiOSeriesConfig = OpenAIOSeriesConfig()
 from .llms.openai.chat.gpt_transformation import (
    OpenAIGPTConfig,
 )
 from .llms.openai.transcriptions.whisper_transformation import (
    OpenAIWhisperAudioTranscriptionConfig,
 )
 from .llms.openai.transcriptions.gpt_transformation import (
    OpenAIGPTAudioTranscriptionConfig,
 )
 openAIGPTConfig = OpenAIGPTConfig()
 from .llms.openai.chat.gpt_audio_transformation import (
@ -978,6 +976,7 @@ from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
 from .llms.friendliai.chat.transformation import FriendliaiChatConfig
 from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
 from .llms.xai.chat.transformation import XAIChatConfig
 from .llms.xai.common_utils import XAIModelInfo
 from .llms.volcengine import VolcEngineConfig
 from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
 from .llms.azure.azure import (
@ -1047,10 +1046,10 @@ from .types.llms.custom_llm import CustomLLMItem
 from .types.utils import GenericStreamingChunk
 custom_provider_map: List[CustomLLMItem] = []
-_custom_providers: List[str] = (
+_custom_providers: List[
-    []
+    str
-)  # internal helper util, used to track names of custom providers
+] = []  # internal helper util, used to track names of custom providers
-disable_hf_tokenizer_download: Optional[bool] = (
+disable_hf_tokenizer_download: Optional[
-    None  # disable huggingface tokenizer download. Defaults to openai clk100
+    bool
-)
+] = None  # disable huggingface tokenizer download. Defaults to openai clk100
 global_disable_no_log_param: bool = False
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -1,6 +1,7 @@
 import json
 import logging
 import os
 import sys
 from datetime import datetime
 from logging import Formatter
@ -40,9 +41,56 @@ class JsonFormatter(Formatter):
        return json.dumps(json_record)
 # Function to set up exception handlers for JSON logging
 def _setup_json_exception_handlers(formatter):
    # Create a handler with JSON formatting for exceptions
    error_handler = logging.StreamHandler()
    error_handler.setFormatter(formatter)
    # Setup excepthook for uncaught exceptions
    def json_excepthook(exc_type, exc_value, exc_traceback):
        record = logging.LogRecord(
            name="LiteLLM",
            level=logging.ERROR,
            pathname="",
            lineno=0,
            msg=str(exc_value),
            args=(),
            exc_info=(exc_type, exc_value, exc_traceback),
        )
        error_handler.handle(record)
    sys.excepthook = json_excepthook
    # Configure asyncio exception handler if possible
    try:
        import asyncio
        def async_json_exception_handler(loop, context):
            exception = context.get("exception")
            if exception:
                record = logging.LogRecord(
                    name="LiteLLM",
                    level=logging.ERROR,
                    pathname="",
                    lineno=0,
                    msg=str(exception),
                    args=(),
                    exc_info=None,
                )
                error_handler.handle(record)
            else:
                loop.default_exception_handler(context)
        asyncio.get_event_loop().set_exception_handler(async_json_exception_handler)
    except Exception:
        pass
 # Create a formatter and set it for the handler
 if json_logs:
    handler.setFormatter(JsonFormatter())
    _setup_json_exception_handlers(JsonFormatter())
 else:
    formatter = logging.Formatter(
        "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
@ -65,18 +113,24 @@ def _turn_on_json():
    handler = logging.StreamHandler()
    handler.setFormatter(JsonFormatter())
-    # Define a list of the loggers to update
+    # Define all loggers to update, including root logger
-    loggers = [verbose_router_logger, verbose_proxy_logger, verbose_logger]
+    loggers = [logging.getLogger()] + [
        verbose_router_logger,
        verbose_proxy_logger,
        verbose_logger,
    ]
    # Iterate through each logger and update its handlers
    for logger in loggers:
        # Remove all existing handlers
        for h in logger.handlers[:]:
            logger.removeHandler(h)
        # Add the new handler
        logger.addHandler(handler)
    # Set up exception handlers
    _setup_json_exception_handlers(JsonFormatter())
 def _turn_on_debug():
    verbose_logger.setLevel(level=logging.DEBUG)  # set package log to debug
--- a/litellm/_redis.py
+++ b/litellm/_redis.py
@ -202,6 +202,7 @@ def init_redis_cluster(redis_kwargs) -> redis.RedisCluster:
 def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
    sentinel_nodes = redis_kwargs.get("sentinel_nodes")
    sentinel_password = redis_kwargs.get("sentinel_password")
    service_name = redis_kwargs.get("service_name")
    if not sentinel_nodes or not service_name:
@ -212,7 +213,11 @@ def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
    verbose_logger.debug("init_redis_sentinel: sentinel nodes are being initialized.")
    # Set up the Sentinel client
-    sentinel = redis.Sentinel(sentinel_nodes, socket_timeout=0.1)
+    sentinel = redis.Sentinel(
        sentinel_nodes, 
        socket_timeout=0.1,
        password=sentinel_password,
    )
    # Return the master instance for the given service
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@ -15,7 +15,7 @@ from .types.services import ServiceLoggerPayload, ServiceTypes
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
    OTELClass = OpenTelemetry
 else:
    Span = Any
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@ -153,7 +153,6 @@ def create_batch(
        )
        api_base: Optional[str] = None
        if custom_llm_provider == "openai":
            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
            api_base = (
                optional_params.api_base
@ -358,7 +357,6 @@ def retrieve_batch(
        _is_async = kwargs.pop("aretrieve_batch", False) is True
        api_base: Optional[str] = None
        if custom_llm_provider == "openai":
            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
            api_base = (
                optional_params.api_base
--- a/litellm/caching/base_cache.py
+++ b/litellm/caching/base_cache.py
@ -9,12 +9,12 @@ Has 4 methods:
 """
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any, Optional, Union
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Span = Any
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -66,9 +66,7 @@ class CachingHandlerResponse(BaseModel):
    cached_result: Optional[Any] = None
    final_embedding_cached_response: Optional[EmbeddingResponse] = None
-    embedding_all_elements_cache_hit: bool = (
+    embedding_all_elements_cache_hit: bool = False  # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
        False  # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
    )
 class LLMCachingHandler:
@ -738,7 +736,6 @@ class LLMCachingHandler:
        if self._should_store_result_in_cache(
            original_function=self.original_function, kwargs=new_kwargs
        ):
            litellm.cache.add_cache(result, **new_kwargs)
        return
@ -865,9 +862,9 @@ class LLMCachingHandler:
        }
        if litellm.cache is not None:
-            litellm_params["preset_cache_key"] = (
+            litellm_params[
-                litellm.cache._get_preset_cache_key_from_kwargs(**kwargs)
+                "preset_cache_key"
-            )
+            ] = litellm.cache._get_preset_cache_key_from_kwargs(**kwargs)
        else:
            litellm_params["preset_cache_key"] = None
--- a/litellm/caching/disk_cache.py
+++ b/litellm/caching/disk_cache.py
@ -1,12 +1,12 @@
 import json
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any, Optional, Union
 from .base_cache import BaseCache
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Span = Any
--- a/litellm/caching/dual_cache.py
+++ b/litellm/caching/dual_cache.py
@ -12,7 +12,7 @@ import asyncio
 import time
 import traceback
 from concurrent.futures import ThreadPoolExecutor
-from typing import TYPE_CHECKING, Any, List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional, Union
 import litellm
 from litellm._logging import print_verbose, verbose_logger
@ -24,7 +24,7 @@ from .redis_cache import RedisCache
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Span = Any
--- a/litellm/caching/llm_caching_handler.py
+++ b/litellm/caching/llm_caching_handler.py
@ -8,7 +8,6 @@ from .in_memory_cache import InMemoryCache
 class LLMClientCache(InMemoryCache):
    def update_cache_key_with_event_loop(self, key):
        """
        Add the event loop to the cache key, to prevent event loop closed errors.
--- a/litellm/caching/redis_cache.py
+++ b/litellm/caching/redis_cache.py
@ -34,7 +34,7 @@ if TYPE_CHECKING:
    cluster_pipeline = ClusterPipeline
    async_redis_client = Redis
    async_redis_cluster_client = RedisCluster
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    pipeline = Any
    cluster_pipeline = Any
@ -57,7 +57,6 @@ class RedisCache(BaseCache):
        socket_timeout: Optional[float] = 5.0,  # default 5 second timeout
        **kwargs,
    ):
        from litellm._service_logger import ServiceLogging
        from .._redis import get_redis_client, get_redis_connection_pool
@ -1045,3 +1044,109 @@ class RedisCache(BaseCache):
        except Exception as e:
            verbose_logger.debug(f"Redis TTL Error: {e}")
            return None
    async def async_rpush(
        self,
        key: str,
        values: List[Any],
        parent_otel_span: Optional[Span] = None,
        **kwargs,
    ) -> int:
        """
        Append one or multiple values to a list stored at key
        Args:
            key: The Redis key of the list
            values: One or more values to append to the list
            parent_otel_span: Optional parent OpenTelemetry span
        Returns:
            int: The length of the list after the push operation
        """
        _redis_client: Any = self.init_async_client()
        start_time = time.time()
        try:
            response = await _redis_client.rpush(key, *values)
            ## LOGGING ##
            end_time = time.time()
            _duration = end_time - start_time
            asyncio.create_task(
                self.service_logger_obj.async_service_success_hook(
                    service=ServiceTypes.REDIS,
                    duration=_duration,
                    call_type="async_rpush",
                )
            )
            return response
        except Exception as e:
            # NON blocking - notify users Redis is throwing an exception
            ## LOGGING ##
            end_time = time.time()
            _duration = end_time - start_time
            asyncio.create_task(
                self.service_logger_obj.async_service_failure_hook(
                    service=ServiceTypes.REDIS,
                    duration=_duration,
                    error=e,
                    call_type="async_rpush",
                )
            )
            verbose_logger.error(
                f"LiteLLM Redis Cache RPUSH: - Got exception from REDIS : {str(e)}"
            )
            raise e
    async def async_lpop(
        self,
        key: str,
        count: Optional[int] = None,
        parent_otel_span: Optional[Span] = None,
        **kwargs,
    ) -> Union[Any, List[Any]]:
        _redis_client: Any = self.init_async_client()
        start_time = time.time()
        print_verbose(f"LPOP from Redis list: key: {key}, count: {count}")
        try:
            result = await _redis_client.lpop(key, count)
            ## LOGGING ##
            end_time = time.time()
            _duration = end_time - start_time
            asyncio.create_task(
                self.service_logger_obj.async_service_success_hook(
                    service=ServiceTypes.REDIS,
                    duration=_duration,
                    call_type="async_lpop",
                )
            )
            # Handle result parsing if needed
            if isinstance(result, bytes):
                try:
                    return result.decode("utf-8")
                except Exception:
                    return result
            elif isinstance(result, list) and all(
                isinstance(item, bytes) for item in result
            ):
                try:
                    return [item.decode("utf-8") for item in result]
                except Exception:
                    return result
            return result
        except Exception as e:
            # NON blocking - notify users Redis is throwing an exception
            ## LOGGING ##
            end_time = time.time()
            _duration = end_time - start_time
            asyncio.create_task(
                self.service_logger_obj.async_service_failure_hook(
                    service=ServiceTypes.REDIS,
                    duration=_duration,
                    error=e,
                    call_type="async_lpop",
                )
            )
            verbose_logger.error(
                f"LiteLLM Redis Cache LPOP: - Got exception from REDIS : {str(e)}"
            )
            raise e
--- a/litellm/caching/redis_cluster_cache.py
+++ b/litellm/caching/redis_cluster_cache.py
@ -5,7 +5,7 @@ Key differences:
 - RedisClient NEEDs to be re-used across requests, adds 3000ms latency if it's re-created
 """
-from typing import TYPE_CHECKING, Any, List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional, Union
 from litellm.caching.redis_cache import RedisCache
@ -16,7 +16,7 @@ if TYPE_CHECKING:
    pipeline = Pipeline
    async_redis_client = Redis
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    pipeline = Any
    async_redis_client = Any
--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@ -13,11 +13,15 @@ import ast
 import asyncio
 import json
 import os
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 import litellm
 from litellm._logging import print_verbose
-from litellm.litellm_core_utils.prompt_templates.common_utils import get_str_from_messages
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
    get_str_from_messages,
 )
 from litellm.types.utils import EmbeddingResponse
 from .base_cache import BaseCache
@ -87,14 +91,16 @@ class RedisSemanticCache(BaseCache):
        if redis_url is None:
            try:
                # Attempt to use provided parameters or fallback to environment variables
-                host = host or os.environ['REDIS_HOST']
+                host = host or os.environ["REDIS_HOST"]
-                port = port or os.environ['REDIS_PORT']
+                port = port or os.environ["REDIS_PORT"]
-                password = password or os.environ['REDIS_PASSWORD']
+                password = password or os.environ["REDIS_PASSWORD"]
            except KeyError as e:
                # Raise a more informative exception if any of the required keys are missing
                missing_var = e.args[0]
-                raise ValueError(f"Missing required Redis configuration: {missing_var}. "
+                raise ValueError(
-                                 f"Provide {missing_var} or redis_url.") from e
+                    f"Missing required Redis configuration: {missing_var}. "
                    f"Provide {missing_var} or redis_url."
                ) from e
            redis_url = f"redis://:{password}@{host}:{port}"
@ -137,10 +143,13 @@ class RedisSemanticCache(BaseCache):
            List[float]: The embedding vector
        """
        # Create an embedding from prompt
-        embedding_response = litellm.embedding(
+        embedding_response = cast(
            EmbeddingResponse,
            litellm.embedding(
                model=self.embedding_model,
                input=prompt,
                cache={"no-store": True, "no-cache": True},
            ),
        )
        embedding = embedding_response["data"][0]["embedding"]
        return embedding
@ -186,6 +195,7 @@ class RedisSemanticCache(BaseCache):
        """
        print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}")
        value_str: Optional[str] = None
        try:
            # Extract the prompt from messages
            messages = kwargs.get("messages", [])
@ -203,7 +213,9 @@ class RedisSemanticCache(BaseCache):
            else:
                self.llmcache.store(prompt, value_str)
        except Exception as e:
-            print_verbose(f"Error setting {value_str} in the Redis semantic cache: {str(e)}")
+            print_verbose(
                f"Error setting {value_str or value} in the Redis semantic cache: {str(e)}"
            )
    def get_cache(self, key: str, **kwargs) -> Any:
        """
@ -336,13 +348,13 @@ class RedisSemanticCache(BaseCache):
                    prompt,
                    value_str,
                    vector=prompt_embedding,  # Pass through custom embedding
-                    ttl=ttl
+                    ttl=ttl,
                )
            else:
                await self.llmcache.astore(
                    prompt,
                    value_str,
-                    vector=prompt_embedding  # Pass through custom embedding
+                    vector=prompt_embedding,  # Pass through custom embedding
                )
        except Exception as e:
            print_verbose(f"Error in async_set_cache: {str(e)}")
@ -374,14 +386,13 @@ class RedisSemanticCache(BaseCache):
            prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
            # Check the cache for semantically similar prompts
-            results = await self.llmcache.acheck(
+            results = await self.llmcache.acheck(prompt=prompt, vector=prompt_embedding)
                prompt=prompt,
                vector=prompt_embedding
            )
            # handle results / cache hit
            if not results:
-                kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 # TODO why here but not above??
+                kwargs.setdefault("metadata", {})[
                    "semantic-similarity"
                ] = 0.0  # TODO why here but not above??
                return None
            cache_hit = results[0]
@ -420,7 +431,9 @@ class RedisSemanticCache(BaseCache):
        aindex = await self.llmcache._get_async_index()
        return await aindex.info()
-    async def async_set_cache_pipeline(self, cache_list: List[Tuple[str, Any]], **kwargs) -> None:
+    async def async_set_cache_pipeline(
        self, cache_list: List[Tuple[str, Any]], **kwargs
    ) -> None:
        """
        Asynchronously store multiple values in the semantic cache.
--- a/litellm/caching/s3_cache.py
+++ b/litellm/caching/s3_cache.py
@ -123,7 +123,7 @@ class S3Cache(BaseCache):
                    )  # Convert string to dictionary
                except Exception:
                    cached_response = ast.literal_eval(cached_response)
-            if type(cached_response) is not dict:
+            if not isinstance(cached_response, dict):
                cached_response = dict(cached_response)
            verbose_logger.debug(
                f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -4,9 +4,11 @@ ROUTER_MAX_FALLBACKS = 5
 DEFAULT_BATCH_SIZE = 512
 DEFAULT_FLUSH_INTERVAL_SECONDS = 5
 DEFAULT_MAX_RETRIES = 2
 DEFAULT_MAX_RECURSE_DEPTH = 10
 DEFAULT_FAILURE_THRESHOLD_PERCENT = (
    0.5  # default cooldown a deployment if 50% of requests fail in a given minute
 )
 DEFAULT_MAX_TOKENS = 4096
 DEFAULT_REDIS_SYNC_INTERVAL = 1
 DEFAULT_COOLDOWN_TIME_SECONDS = 5
 DEFAULT_REPLICATE_POLLING_RETRIES = 5
@ -16,6 +18,8 @@ DEFAULT_IMAGE_WIDTH = 300
 DEFAULT_IMAGE_HEIGHT = 300
 MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024  # 1MB = 1024KB
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
 REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
 MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
 #### RELIABILITY ####
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
 #### Networking settings ####
@ -414,6 +418,7 @@ RESPONSE_FORMAT_TOOL_NAME = "json_tool_call"  # default tool name used when conv
 ########################### Logging Callback Constants ###########################
 AZURE_STORAGE_MSFT_VERSION = "2019-07-07"
 MCP_TOOL_NAME_PREFIX = "mcp_tool"
 ########################### LiteLLM Proxy Specific Constants ###########################
 ########################################################################################
@ -441,3 +446,7 @@ HEALTH_CHECK_TIMEOUT_SECONDS = 60  # 60 seconds
 UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
 LITELLM_PROXY_ADMIN_NAME = "default_user_id"
 ########################### DB CRON JOB NAMES ###########################
 DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job"
 DEFAULT_CRON_JOB_LOCK_TTL_SECONDS = 60  # 1 minute
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -2,7 +2,7 @@
 ## File for 'response_cost' calculation in Logging
 import time
 from functools import lru_cache
-from typing import Any, List, Literal, Optional, Tuple, Union
+from typing import Any, List, Literal, Optional, Tuple, Union, cast
 from pydantic import BaseModel
@ -275,15 +275,13 @@ def cost_per_token(  # noqa: PLR0915
                custom_llm_provider=custom_llm_provider,
                prompt_characters=prompt_characters,
                completion_characters=completion_characters,
-                prompt_tokens=prompt_tokens,
+                usage=usage_block,
                completion_tokens=completion_tokens,
            )
        elif cost_router == "cost_per_token":
            return google_cost_per_token(
                model=model_without_prefix,
                custom_llm_provider=custom_llm_provider,
-                prompt_tokens=prompt_tokens,
+                usage=usage_block,
                completion_tokens=completion_tokens,
            )
    elif custom_llm_provider == "anthropic":
        return anthropic_cost_per_token(model=model, usage=usage_block)
@ -464,13 +462,36 @@ def _model_contains_known_llm_provider(model: str) -> bool:
 def _get_usage_object(
    completion_response: Any,
 ) -> Optional[Usage]:
-    usage_obj: Optional[Usage] = None
+    usage_obj = cast(
-    if completion_response is not None and isinstance(
+        Union[Usage, ResponseAPIUsage, dict, BaseModel],
-        completion_response, ModelResponse
+        (
-    ):
+            completion_response.get("usage")
-        usage_obj = completion_response.get("usage")
+            if isinstance(completion_response, dict)
            else getattr(completion_response, "get", lambda x: None)("usage")
        ),
    )
    if usage_obj is None:
        return None
    if isinstance(usage_obj, Usage):
        return usage_obj
    elif (
        usage_obj is not None
        and (isinstance(usage_obj, dict) or isinstance(usage_obj, ResponseAPIUsage))
        and ResponseAPILoggingUtils._is_response_api_usage(usage_obj)
    ):
        return ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
            usage_obj
        )
    elif isinstance(usage_obj, dict):
        return Usage(**usage_obj)
    elif isinstance(usage_obj, BaseModel):
        return Usage(**usage_obj.model_dump())
    else:
        verbose_logger.debug(
            f"Unknown usage object type: {type(usage_obj)}, usage_obj: {usage_obj}"
        )
        return None
 def _is_known_usage_objects(usage_obj):
@ -559,7 +580,6 @@ def completion_cost(  # noqa: PLR0915
        - For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
    """
    try:
        call_type = _infer_call_type(call_type, completion_response) or "completion"
        if (
@ -664,6 +684,7 @@ def completion_cost(  # noqa: PLR0915
            elif len(prompt) > 0:
                prompt_tokens = token_counter(model=model, text=prompt)
            completion_tokens = token_counter(model=model, text=completion)
        if model is None:
            raise ValueError(
                f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
@ -828,11 +849,14 @@ def get_response_cost_from_hidden_params(
        _hidden_params_dict = hidden_params
    additional_headers = _hidden_params_dict.get("additional_headers", {})
-    if additional_headers and "x-litellm-response-cost" in additional_headers:
+    if (
-        response_cost = additional_headers["x-litellm-response-cost"]
+        additional_headers
        and "llm_provider-x-litellm-response-cost" in additional_headers
    ):
        response_cost = additional_headers["llm_provider-x-litellm-response-cost"]
        if response_cost is None:
            return None
-        return float(additional_headers["x-litellm-response-cost"])
+        return float(additional_headers["llm_provider-x-litellm-response-cost"])
    return None
--- a/litellm/experimental_mcp_client/tools.py
+++ b/litellm/experimental_mcp_client/tools.py
@ -1,5 +1,5 @@
 import json
-from typing import List, Literal, Union
+from typing import Dict, List, Literal, Union
 from mcp import ClientSession
 from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
@ -76,8 +76,8 @@ def _get_function_arguments(function: FunctionDefinition) -> dict:
    return arguments if isinstance(arguments, dict) else {}
-def _transform_openai_tool_call_to_mcp_tool_call_request(
+def transform_openai_tool_call_request_to_mcp_tool_call_request(
-    openai_tool: ChatCompletionMessageToolCall,
+    openai_tool: Union[ChatCompletionMessageToolCall, Dict],
 ) -> MCPCallToolRequestParams:
    """Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams."""
    function = openai_tool["function"]
@ -100,9 +100,11 @@ async def call_openai_tool(
    Returns:
        The result of the MCP tool call.
    """
-    mcp_tool_call_request_params = _transform_openai_tool_call_to_mcp_tool_call_request(
+    mcp_tool_call_request_params = (
        transform_openai_tool_call_request_to_mcp_tool_call_request(
            openai_tool=openai_tool,
        )
    )
    return await call_mcp_tool(
        session=session,
        call_tool_request_params=mcp_tool_call_request_params,
--- a/litellm/fine_tuning/main.py
+++ b/litellm/fine_tuning/main.py
@ -138,7 +138,6 @@ def create_fine_tuning_job(
        # OpenAI
        if custom_llm_provider == "openai":
            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
            api_base = (
                optional_params.api_base
@ -360,7 +359,6 @@ def cancel_fine_tuning_job(
        # OpenAI
        if custom_llm_provider == "openai":
            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
            api_base = (
                optional_params.api_base
@ -522,7 +520,6 @@ def list_fine_tuning_jobs(
        # OpenAI
        if custom_llm_provider == "openai":
            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
            api_base = (
                optional_params.api_base
--- a/litellm/integrations/SlackAlerting/batching_handler.py
+++ b/litellm/integrations/SlackAlerting/batching_handler.py
@ -19,7 +19,6 @@ else:
 def squash_payloads(queue):
    squashed = {}
    if len(queue) == 0:
        return squashed
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@ -195,13 +195,16 @@ class SlackAlerting(CustomBatchLogger):
        if self.alerting is None or self.alert_types is None:
            return
-        time_difference_float, model, api_base, messages = (
+        (
-            self._response_taking_too_long_callback_helper(
+            time_difference_float,
            model,
            api_base,
            messages,
        ) = self._response_taking_too_long_callback_helper(
            kwargs=kwargs,
            start_time=start_time,
            end_time=end_time,
        )
        )
        if litellm.turn_off_message_logging or litellm.redact_messages_in_exceptions:
            messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
        request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
@ -819,9 +822,9 @@ class SlackAlerting(CustomBatchLogger):
        ### UNIQUE CACHE KEY ###
        cache_key = provider + region_name
-        outage_value: Optional[ProviderRegionOutageModel] = (
+        outage_value: Optional[
-            await self.internal_usage_cache.async_get_cache(key=cache_key)
+            ProviderRegionOutageModel
-        )
+        ] = await self.internal_usage_cache.async_get_cache(key=cache_key)
        if (
            getattr(exception, "status_code", None) is None
@ -1402,9 +1405,9 @@ Model Info:
            self.alert_to_webhook_url is not None
            and alert_type in self.alert_to_webhook_url
        ):
-            slack_webhook_url: Optional[Union[str, List[str]]] = (
+            slack_webhook_url: Optional[
-                self.alert_to_webhook_url[alert_type]
+                Union[str, List[str]]
-            )
+            ] = self.alert_to_webhook_url[alert_type]
        elif self.default_webhook_url is not None:
            slack_webhook_url = self.default_webhook_url
        else:
@ -1768,7 +1771,6 @@ Model Info:
        - Team Created, Updated, Deleted
        """
        try:
            message = f"`{event_name}`\n"
            key_event_dict = key_event.model_dump()
--- a/litellm/integrations/argilla.py
+++ b/litellm/integrations/argilla.py
@ -98,7 +98,6 @@ class ArgillaLogger(CustomBatchLogger):
        argilla_dataset_name: Optional[str],
        argilla_base_url: Optional[str],
    ) -> ArgillaCredentialsObject:
        _credentials_api_key = argilla_api_key or os.getenv("ARGILLA_API_KEY")
        if _credentials_api_key is None:
            raise Exception("Invalid Argilla API Key given. _credentials_api_key=None.")
--- a/litellm/integrations/arize/_utils.py
+++ b/litellm/integrations/arize/_utils.py
@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any, Optional, Union
 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
@ -7,7 +7,7 @@ from litellm.types.utils import StandardLoggingPayload
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Span = Any
--- a/litellm/integrations/arize/arize.py
+++ b/litellm/integrations/arize/arize.py
@ -19,14 +19,13 @@ if TYPE_CHECKING:
    from litellm.types.integrations.arize import Protocol as _Protocol
    Protocol = _Protocol
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Protocol = Any
    Span = Any
 class ArizeLogger(OpenTelemetry):
    def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
        ArizeLogger.set_arize_attributes(span, kwargs, response_obj)
        return
--- a/litellm/integrations/arize/arize_phoenix.py
+++ b/litellm/integrations/arize/arize_phoenix.py
@ -1,17 +1,20 @@
 import os
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Union
-from litellm.integrations.arize import _utils
+
 from litellm._logging import verbose_logger
 from litellm.integrations.arize import _utils
 from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig
 if TYPE_CHECKING:
    from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
    from litellm.types.integrations.arize import Protocol as _Protocol
    from opentelemetry.trace import Span as _Span
    from litellm.types.integrations.arize import Protocol as _Protocol
    from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
    Protocol = _Protocol
    OpenTelemetryConfig = _OpenTelemetryConfig
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Protocol = Any
    OpenTelemetryConfig = Any
@ -20,6 +23,7 @@ else:
 ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://app.phoenix.arize.com/v1/traces"
 class ArizePhoenixLogger:
    @staticmethod
    def set_arize_phoenix_attributes(span: Span, kwargs, response_obj):
@ -59,15 +63,14 @@ class ArizePhoenixLogger:
        # a slightly different auth header format than self hosted phoenix
        if endpoint == ARIZE_HOSTED_PHOENIX_ENDPOINT:
            if api_key is None:
-                raise ValueError("PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used.")
+                raise ValueError(
                    "PHOENIX_API_KEY must be set when the Arize hosted Phoenix endpoint is used."
                )
            otlp_auth_headers = f"api_key={api_key}"
        elif api_key is not None:
            # api_key/auth is optional for self hosted phoenix
            otlp_auth_headers = f"Authorization=Bearer {api_key}"
        return ArizePhoenixConfig(
-            otlp_auth_headers=otlp_auth_headers,
+            otlp_auth_headers=otlp_auth_headers, protocol=protocol, endpoint=endpoint
            protocol=protocol,
            endpoint=endpoint
        )
--- a/litellm/integrations/athina.py
+++ b/litellm/integrations/athina.py
@ -12,7 +12,10 @@ class AthinaLogger:
            "athina-api-key": self.athina_api_key,
            "Content-Type": "application/json",
        }
-        self.athina_logging_url = os.getenv("ATHINA_BASE_URL", "https://log.athina.ai") + "/api/v1/log/inference"
+        self.athina_logging_url = (
            os.getenv("ATHINA_BASE_URL", "https://log.athina.ai")
            + "/api/v1/log/inference"
        )
        self.additional_keys = [
            "environment",
            "prompt_slug",
--- a/litellm/integrations/azure_storage/azure_storage.py
+++ b/litellm/integrations/azure_storage/azure_storage.py
@ -50,12 +50,12 @@ class AzureBlobStorageLogger(CustomBatchLogger):
            self.azure_storage_file_system: str = _azure_storage_file_system
            # Internal variables used for Token based authentication
-            self.azure_auth_token: Optional[str] = (
+            self.azure_auth_token: Optional[
-                None  # the Azure AD token to use for Azure Storage API requests
+                str
-            )
+            ] = None  # the Azure AD token to use for Azure Storage API requests
-            self.token_expiry: Optional[datetime] = (
+            self.token_expiry: Optional[
-                None  # the expiry time of the currentAzure AD token
+                datetime
-            )
+            ] = None  # the expiry time of the currentAzure AD token
            asyncio.create_task(self.periodic_flush())
            self.flush_lock = asyncio.Lock()
@ -153,7 +153,6 @@ class AzureBlobStorageLogger(CustomBatchLogger):
        3. Flush the data
        """
        try:
            if self.azure_storage_account_key:
                await self.upload_to_azure_data_lake_with_azure_account_key(
                    payload=payload
--- a/litellm/integrations/braintrust_logging.py
+++ b/litellm/integrations/braintrust_logging.py
@ -4,7 +4,7 @@
 import copy
 import os
 from datetime import datetime
-from typing import Optional, Dict
+from typing import Dict, Optional
 import httpx
 from pydantic import BaseModel
@ -19,7 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
 )
 from litellm.utils import print_verbose
-global_braintrust_http_handler = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback)
+global_braintrust_http_handler = get_async_httpx_client(
    llm_provider=httpxSpecialProvider.LoggingCallback
 )
 global_braintrust_sync_http_handler = HTTPHandler()
 API_BASE = "https://api.braintrustdata.com/v1"
@ -35,7 +37,9 @@ def get_utc_datetime():
 class BraintrustLogger(CustomLogger):
-    def __init__(self, api_key: Optional[str] = None, api_base: Optional[str] = None) -> None:
+    def __init__(
        self, api_key: Optional[str] = None, api_base: Optional[str] = None
    ) -> None:
        super().__init__()
        self.validate_environment(api_key=api_key)
        self.api_base = api_base or API_BASE
@ -45,7 +49,9 @@ class BraintrustLogger(CustomLogger):
            "Authorization": "Bearer " + self.api_key,
            "Content-Type": "application/json",
        }
-        self._project_id_cache: Dict[str, str] = {}  # Cache mapping project names to IDs
+        self._project_id_cache: Dict[
            str, str
        ] = {}  # Cache mapping project names to IDs
    def validate_environment(self, api_key: Optional[str]):
        """
@ -71,7 +77,9 @@ class BraintrustLogger(CustomLogger):
        try:
            response = global_braintrust_sync_http_handler.post(
-                f"{self.api_base}/project", headers=self.headers, json={"name": project_name}
+                f"{self.api_base}/project",
                headers=self.headers,
                json={"name": project_name},
            )
            project_dict = response.json()
            project_id = project_dict["id"]
@ -89,7 +97,9 @@ class BraintrustLogger(CustomLogger):
        try:
            response = await global_braintrust_http_handler.post(
-                f"{self.api_base}/project/register", headers=self.headers, json={"name": project_name}
+                f"{self.api_base}/project/register",
                headers=self.headers,
                json={"name": project_name},
            )
            project_dict = response.json()
            project_id = project_dict["id"]
@ -116,15 +126,21 @@ class BraintrustLogger(CustomLogger):
        if metadata is None:
            metadata = {}
-        proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
+        proxy_headers = (
            litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
        )
        for metadata_param_key in proxy_headers:
            if metadata_param_key.startswith("braintrust"):
                trace_param_key = metadata_param_key.replace("braintrust", "", 1)
                if trace_param_key in metadata:
-                    verbose_logger.warning(f"Overwriting Braintrust `{trace_param_key}` from request header")
+                    verbose_logger.warning(
                        f"Overwriting Braintrust `{trace_param_key}` from request header"
                    )
                else:
-                    verbose_logger.debug(f"Found Braintrust `{trace_param_key}` in request header")
+                    verbose_logger.debug(
                        f"Found Braintrust `{trace_param_key}` in request header"
                    )
                metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
        return metadata
@ -157,24 +173,35 @@ class BraintrustLogger(CustomLogger):
            output = None
            choices = []
            if response_obj is not None and (
-                kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
+                kwargs.get("call_type", None) == "embedding"
                or isinstance(response_obj, litellm.EmbeddingResponse)
            ):
                output = None
-            elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
+            elif response_obj is not None and isinstance(
                response_obj, litellm.ModelResponse
            ):
                output = response_obj["choices"][0]["message"].json()
                choices = response_obj["choices"]
-            elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
+            elif response_obj is not None and isinstance(
                response_obj, litellm.TextCompletionResponse
            ):
                output = response_obj.choices[0].text
                choices = response_obj.choices
-            elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
+            elif response_obj is not None and isinstance(
                response_obj, litellm.ImageResponse
            ):
                output = response_obj["data"]
            litellm_params = kwargs.get("litellm_params", {})
-            metadata = litellm_params.get("metadata", {}) or {}  # if litellm_params['metadata'] == None
+            metadata = (
                litellm_params.get("metadata", {}) or {}
            )  # if litellm_params['metadata'] == None
            metadata = self.add_metadata_from_header(litellm_params, metadata)
            clean_metadata = {}
            try:
-                metadata = copy.deepcopy(metadata)  # Avoid modifying the original metadata
+                metadata = copy.deepcopy(
                    metadata
                )  # Avoid modifying the original metadata
            except Exception:
                new_metadata = {}
                for key, value in metadata.items():
@ -192,7 +219,9 @@ class BraintrustLogger(CustomLogger):
            project_id = metadata.get("project_id")
            if project_id is None:
                project_name = metadata.get("project_name")
-                project_id = self.get_project_id_sync(project_name) if project_name else None
+                project_id = (
                    self.get_project_id_sync(project_name) if project_name else None
                )
            if project_id is None:
                if self.default_project_id is None:
@ -234,7 +263,8 @@ class BraintrustLogger(CustomLogger):
                    "completion_tokens": usage_obj.completion_tokens,
                    "total_tokens": usage_obj.total_tokens,
                    "total_cost": cost,
-                    "time_to_first_token": end_time.timestamp() - start_time.timestamp(),
+                    "time_to_first_token": end_time.timestamp()
                    - start_time.timestamp(),
                    "start": start_time.timestamp(),
                    "end": end_time.timestamp(),
                }
@ -255,7 +285,9 @@ class BraintrustLogger(CustomLogger):
                request_data["metrics"] = metrics
            try:
-                print_verbose(f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}")
+                print_verbose(
                    f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}"
                )
                global_braintrust_sync_http_handler.post(
                    url=f"{self.api_base}/project_logs/{project_id}/insert",
                    json={"events": [request_data]},
@ -276,20 +308,29 @@ class BraintrustLogger(CustomLogger):
            output = None
            choices = []
            if response_obj is not None and (
-                kwargs.get("call_type", None) == "embedding" or isinstance(response_obj, litellm.EmbeddingResponse)
+                kwargs.get("call_type", None) == "embedding"
                or isinstance(response_obj, litellm.EmbeddingResponse)
            ):
                output = None
-            elif response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
+            elif response_obj is not None and isinstance(
                response_obj, litellm.ModelResponse
            ):
                output = response_obj["choices"][0]["message"].json()
                choices = response_obj["choices"]
-            elif response_obj is not None and isinstance(response_obj, litellm.TextCompletionResponse):
+            elif response_obj is not None and isinstance(
                response_obj, litellm.TextCompletionResponse
            ):
                output = response_obj.choices[0].text
                choices = response_obj.choices
-            elif response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
+            elif response_obj is not None and isinstance(
                response_obj, litellm.ImageResponse
            ):
                output = response_obj["data"]
            litellm_params = kwargs.get("litellm_params", {})
-            metadata = litellm_params.get("metadata", {}) or {}  # if litellm_params['metadata'] == None
+            metadata = (
                litellm_params.get("metadata", {}) or {}
            )  # if litellm_params['metadata'] == None
            metadata = self.add_metadata_from_header(litellm_params, metadata)
            clean_metadata = {}
            new_metadata = {}
@ -313,7 +354,11 @@ class BraintrustLogger(CustomLogger):
            project_id = metadata.get("project_id")
            if project_id is None:
                project_name = metadata.get("project_name")
-                project_id = await self.get_project_id_async(project_name) if project_name else None
+                project_id = (
                    await self.get_project_id_async(project_name)
                    if project_name
                    else None
                )
            if project_id is None:
                if self.default_project_id is None:
@ -362,8 +407,14 @@ class BraintrustLogger(CustomLogger):
                api_call_start_time = kwargs.get("api_call_start_time")
                completion_start_time = kwargs.get("completion_start_time")
-                if api_call_start_time is not None and completion_start_time is not None:
+                if (
-                    metrics["time_to_first_token"] = completion_start_time.timestamp() - api_call_start_time.timestamp()
+                    api_call_start_time is not None
                    and completion_start_time is not None
                ):
                    metrics["time_to_first_token"] = (
                        completion_start_time.timestamp()
                        - api_call_start_time.timestamp()
                    )
            request_data = {
                "id": litellm_call_id,
--- a/litellm/integrations/custom_batch_logger.py
+++ b/litellm/integrations/custom_batch_logger.py
@ -14,7 +14,6 @@ from litellm.integrations.custom_logger import CustomLogger
 class CustomBatchLogger(CustomLogger):
    def __init__(
        self,
        flush_lock: Optional[asyncio.Lock] = None,
--- a/litellm/integrations/custom_guardrail.py
+++ b/litellm/integrations/custom_guardrail.py
@ -7,7 +7,6 @@ from litellm.types.utils import StandardLoggingGuardrailInformation
 class CustomGuardrail(CustomLogger):
    def __init__(
        self,
        guardrail_name: Optional[str] = None,
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -31,7 +31,7 @@ from litellm.types.utils import (
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Span = Any
--- a/litellm/integrations/datadog/datadog.py
+++ b/litellm/integrations/datadog/datadog.py
@ -233,7 +233,6 @@ class DataDogLogger(
        pass
    async def _log_async_event(self, kwargs, response_obj, start_time, end_time):
        dd_payload = self.create_datadog_logging_payload(
            kwargs=kwargs,
            response_obj=response_obj,
--- a/litellm/integrations/gcs_bucket/gcs_bucket_base.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket_base.py
@ -125,9 +125,9 @@ class GCSBucketBase(CustomBatchLogger):
        if kwargs is None:
            kwargs = {}
-        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
+        standard_callback_dynamic_params: Optional[
-            kwargs.get("standard_callback_dynamic_params", None)
+            StandardCallbackDynamicParams
-        )
+        ] = kwargs.get("standard_callback_dynamic_params", None)
        bucket_name: str
        path_service_account: Optional[str]
--- a/litellm/integrations/gcs_pubsub/pub_sub.py
+++ b/litellm/integrations/gcs_pubsub/pub_sub.py
@ -70,13 +70,14 @@ class GcsPubSubLogger(CustomBatchLogger):
        """Construct authorization headers using Vertex AI auth"""
        from litellm import vertex_chat_completion
-        _auth_header, vertex_project = (
+        (
-            await vertex_chat_completion._ensure_access_token_async(
+            _auth_header,
            vertex_project,
        ) = await vertex_chat_completion._ensure_access_token_async(
            credentials=self.path_service_account_json,
            project_id=None,
            custom_llm_provider="vertex_ai",
        )
        )
        auth_header, _ = vertex_chat_completion._get_token_and_url(
            model="pub-sub",
--- a/litellm/integrations/humanloop.py
+++ b/litellm/integrations/humanloop.py
@ -155,11 +155,7 @@ class HumanloopLogger(CustomLogger):
        prompt_id: str,
        prompt_variables: Optional[dict],
        dynamic_callback_params: StandardCallbackDynamicParams,
-    ) -> Tuple[
+    ) -> Tuple[str, List[AllMessageValues], dict,]:
        str,
        List[AllMessageValues],
        dict,
    ]:
        humanloop_api_key = dynamic_callback_params.get(
            "humanloop_api_key"
        ) or get_secret_str("HUMANLOOP_API_KEY")
--- a/litellm/integrations/langfuse/langfuse.py
+++ b/litellm/integrations/langfuse/langfuse.py
@ -471,9 +471,9 @@ class LangFuseLogger:
            # we clean out all extra litellm metadata params before logging
            clean_metadata: Dict[str, Any] = {}
            if prompt_management_metadata is not None:
-                clean_metadata["prompt_management_metadata"] = (
+                clean_metadata[
-                    prompt_management_metadata
+                    "prompt_management_metadata"
-                )
+                ] = prompt_management_metadata
            if isinstance(metadata, dict):
                for key, value in metadata.items():
                    # generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
--- a/litellm/integrations/langfuse/langfuse_handler.py
+++ b/litellm/integrations/langfuse/langfuse_handler.py
@ -19,7 +19,6 @@ else:
 class LangFuseHandler:
    @staticmethod
    def get_langfuse_logger_for_request(
        standard_callback_dynamic_params: StandardCallbackDynamicParams,
@ -87,7 +86,9 @@ class LangFuseHandler:
        if globalLangfuseLogger is not None:
            return globalLangfuseLogger
-        credentials_dict: Dict[str, Any] = (
+        credentials_dict: Dict[
            str, Any
        ] = (
            {}
        )  # the global langfuse logger uses Environment Variables, there are no dynamic credentials
        globalLangfuseLogger = in_memory_dynamic_logger_cache.get_cache(
--- a/litellm/integrations/langfuse/langfuse_prompt_management.py
+++ b/litellm/integrations/langfuse/langfuse_prompt_management.py
@ -172,11 +172,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
        prompt_id: str,
        prompt_variables: Optional[dict],
        dynamic_callback_params: StandardCallbackDynamicParams,
-    ) -> Tuple[
+    ) -> Tuple[str, List[AllMessageValues], dict,]:
        str,
        List[AllMessageValues],
        dict,
    ]:
        return self.get_chat_completion_prompt(
            model,
            messages,
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@ -75,7 +75,6 @@ class LangsmithLogger(CustomBatchLogger):
        langsmith_project: Optional[str] = None,
        langsmith_base_url: Optional[str] = None,
    ) -> LangsmithCredentialsObject:
        _credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY")
        if _credentials_api_key is None:
            raise Exception(
@ -443,9 +442,9 @@ class LangsmithLogger(CustomBatchLogger):
        Otherwise, use the default credentials.
        """
-        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
+        standard_callback_dynamic_params: Optional[
-            kwargs.get("standard_callback_dynamic_params", None)
+            StandardCallbackDynamicParams
-        )
+        ] = kwargs.get("standard_callback_dynamic_params", None)
        if standard_callback_dynamic_params is not None:
            credentials = self.get_credentials_from_env(
                langsmith_api_key=standard_callback_dynamic_params.get(
@ -481,7 +480,6 @@ class LangsmithLogger(CustomBatchLogger):
            asyncio.run(self.async_send_batch())
    def get_run_by_id(self, run_id):
        langsmith_api_key = self.default_credentials["LANGSMITH_API_KEY"]
        langsmith_api_base = self.default_credentials["LANGSMITH_BASE_URL"]
--- a/litellm/integrations/langtrace.py
+++ b/litellm/integrations/langtrace.py
@ -1,12 +1,12 @@
 import json
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Union
 from litellm.proxy._types import SpanAttributes
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Span = Any
--- a/litellm/integrations/lunary.py
+++ b/litellm/integrations/lunary.py
@ -20,7 +20,6 @@ def parse_tool_calls(tool_calls):
        return None
    def clean_tool_call(tool_call):
        serialized = {
            "type": tool_call.type,
            "id": tool_call.id,
@ -36,7 +35,6 @@ def parse_tool_calls(tool_calls):
 def parse_messages(input):
    if input is None:
        return None
--- a/litellm/integrations/mlflow.py
+++ b/litellm/integrations/mlflow.py
@ -48,14 +48,17 @@ class MlflowLogger(CustomLogger):
    def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
        try:
-            from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools
+            from mlflow.tracing.utils import set_span_chat_messages  # type: ignore
            from mlflow.tracing.utils import set_span_chat_tools  # type: ignore
        except ImportError:
            return
        inputs = self._construct_input(kwargs)
        input_messages = inputs.get("messages", [])
-        output_messages = [c.message.model_dump(exclude_none=True)
+        output_messages = [
-                           for c in getattr(response_obj, "choices", [])]
+            c.message.model_dump(exclude_none=True)
            for c in getattr(response_obj, "choices", [])
        ]
        if messages := [*input_messages, *output_messages]:
            set_span_chat_messages(span, messages)
        if tools := inputs.get("tools"):
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -1,7 +1,7 @@
 import os
 from dataclasses import dataclass
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
 import litellm
 from litellm._logging import verbose_logger
@ -23,10 +23,10 @@ if TYPE_CHECKING:
    )
    from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
-    Span = _Span
+    Span = Union[_Span, Any]
-    SpanExporter = _SpanExporter
+    SpanExporter = Union[_SpanExporter, Any]
-    UserAPIKeyAuth = _UserAPIKeyAuth
+    UserAPIKeyAuth = Union[_UserAPIKeyAuth, Any]
-    ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload
+    ManagementEndpointLoggingPayload = Union[_ManagementEndpointLoggingPayload, Any]
 else:
    Span = Any
    SpanExporter = Any
@ -46,7 +46,6 @@ LITELLM_REQUEST_SPAN_NAME = "litellm_request"
@dataclass
 class OpenTelemetryConfig:
    exporter: Union[str, SpanExporter] = "console"
    endpoint: Optional[str] = None
    headers: Optional[str] = None
@ -154,7 +153,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[datetime, float]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -215,7 +213,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[float, datetime]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -353,9 +350,9 @@ class OpenTelemetry(CustomLogger):
        """
        from opentelemetry import trace
-        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
+        standard_callback_dynamic_params: Optional[
-            kwargs.get("standard_callback_dynamic_params")
+            StandardCallbackDynamicParams
-        )
+        ] = kwargs.get("standard_callback_dynamic_params")
        if not standard_callback_dynamic_params:
            return
@ -722,7 +719,6 @@ class OpenTelemetry(CustomLogger):
        span.set_attribute(key, primitive_value)
    def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
        kwargs.get("optional_params", {})
        litellm_params = kwargs.get("litellm_params", {}) or {}
        custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
@ -843,12 +839,14 @@ class OpenTelemetry(CustomLogger):
            headers=dynamic_headers or self.OTEL_HEADERS
        )
-        if isinstance(self.OTEL_EXPORTER, SpanExporter):
+        if hasattr(
            self.OTEL_EXPORTER, "export"
        ):  # Check if it has the export method that SpanExporter requires
            verbose_logger.debug(
                "OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
-            return SimpleSpanProcessor(self.OTEL_EXPORTER)
+            return SimpleSpanProcessor(cast(SpanExporter, self.OTEL_EXPORTER))
        if self.OTEL_EXPORTER == "console":
            verbose_logger.debug(
@ -907,7 +905,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -961,7 +958,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
--- a/litellm/integrations/opik/opik.py
+++ b/litellm/integrations/opik/opik.py
@ -185,7 +185,6 @@ class OpikLogger(CustomBatchLogger):
    def _create_opik_payload(  # noqa: PLR0915
        self, kwargs, response_obj, start_time, end_time
    ) -> List[Dict]:
        # Get metadata
        _litellm_params = kwargs.get("litellm_params", {}) or {}
        litellm_params_metadata = _litellm_params.get("metadata", {}) or {}
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -988,9 +988,9 @@ class PrometheusLogger(CustomLogger):
    ):
        try:
            verbose_logger.debug("setting remaining tokens requests metric")
-            standard_logging_payload: Optional[StandardLoggingPayload] = (
+            standard_logging_payload: Optional[
-                request_kwargs.get("standard_logging_object")
+                StandardLoggingPayload
-            )
+            ] = request_kwargs.get("standard_logging_object")
            if standard_logging_payload is None:
                return
--- a/litellm/integrations/prompt_management_base.py
+++ b/litellm/integrations/prompt_management_base.py
@ -14,7 +14,6 @@ class PromptManagementClient(TypedDict):
 class PromptManagementBase(ABC):
    @property
    @abstractmethod
    def integration_name(self) -> str:
@ -83,11 +82,7 @@ class PromptManagementBase(ABC):
        prompt_id: str,
        prompt_variables: Optional[dict],
        dynamic_callback_params: StandardCallbackDynamicParams,
-    ) -> Tuple[
+    ) -> Tuple[str, List[AllMessageValues], dict,]:
        str,
        List[AllMessageValues],
        dict,
    ]:
        if not self.should_run_prompt_management(
            prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params
        ):
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@ -38,7 +38,7 @@ class S3Logger:
            if litellm.s3_callback_params is not None:
                # read in .env variables - example os.environ/AWS_BUCKET_NAME
                for key, value in litellm.s3_callback_params.items():
-                    if type(value) is str and value.startswith("os.environ/"):
+                    if isinstance(value, str) and value.startswith("os.environ/"):
                        litellm.s3_callback_params[key] = litellm.get_secret(value)
                # now set s3 params from litellm.s3_logger_params
                s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")
--- a/litellm/integrations/weights_biases.py
+++ b/litellm/integrations/weights_biases.py
@ -21,11 +21,11 @@ try:
        # contains a (known) object attribute
        object: Literal["chat.completion", "edit", "text_completion"]
-        def __getitem__(self, key: K) -> V: ...  # noqa
+        def __getitem__(self, key: K) -> V:
            ...  # noqa
-        def get(  # noqa
+        def get(self, key: K, default: Optional[V] = None) -> Optional[V]:  # noqa
-            self, key: K, default: Optional[V] = None
+            ...  # pragma: no cover
        ) -> Optional[V]: ...  # pragma: no cover
    class OpenAIRequestResponseResolver:
        def __call__(
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -10,7 +10,7 @@ from litellm.types.llms.openai import AllMessageValues
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    Span = _Span
+    Span = Union[_Span, Any]
 else:
    Span = Any
--- a/litellm/litellm_core_utils/default_encoding.py
+++ b/litellm/litellm_core_utils/default_encoding.py
@ -11,7 +11,9 @@ except (ImportError, AttributeError):
    # Old way to access resources, which setuptools deprecated some time ago
    import pkg_resources  # type: ignore
-    filename = pkg_resources.resource_filename(__name__, "litellm_core_utils/tokenizers")
+    filename = pkg_resources.resource_filename(
        __name__, "litellm_core_utils/tokenizers"
    )
 os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
    "CUSTOM_TIKTOKEN_CACHE_DIR", filename
--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@ -79,6 +79,22 @@ def get_supported_openai_params(  # noqa: PLR0915
    elif custom_llm_provider == "maritalk":
        return litellm.MaritalkConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "openai":
        if request_type == "transcription":
            transcription_provider_config = (
                litellm.ProviderConfigManager.get_provider_audio_transcription_config(
                    model=model, provider=LlmProviders.OPENAI
                )
            )
            if isinstance(
                transcription_provider_config, litellm.OpenAIGPTAudioTranscriptionConfig
            ):
                return transcription_provider_config.get_supported_openai_params(
                    model=model
                )
            else:
                raise ValueError(
                    f"Unsupported provider config: {transcription_provider_config} for model: {model}"
                )
        return litellm.OpenAIConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "azure":
        if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -67,6 +67,7 @@ from litellm.types.utils import (
    StandardCallbackDynamicParams,
    StandardLoggingAdditionalHeaders,
    StandardLoggingHiddenParams,
    StandardLoggingMCPToolCall,
    StandardLoggingMetadata,
    StandardLoggingModelCostFailureDebugInformation,
    StandardLoggingModelInformation,
@ -239,9 +240,9 @@ class Logging(LiteLLMLoggingBaseClass):
        self.litellm_trace_id = litellm_trace_id
        self.function_id = function_id
        self.streaming_chunks: List[Any] = []  # for generating complete stream response
-        self.sync_streaming_chunks: List[Any] = (
+        self.sync_streaming_chunks: List[
-            []
+            Any
-        )  # for generating complete stream response
+        ] = []  # for generating complete stream response
        self.log_raw_request_response = log_raw_request_response
        # Initialize dynamic callbacks
@ -452,11 +453,13 @@ class Logging(LiteLLMLoggingBaseClass):
        prompt_id: str,
        prompt_variables: Optional[dict],
    ) -> Tuple[str, List[AllMessageValues], dict]:
        custom_logger = self.get_custom_logger_for_prompt_management(model)
        if custom_logger:
-            model, messages, non_default_params = (
+            (
-                custom_logger.get_chat_completion_prompt(
+                model,
                messages,
                non_default_params,
            ) = custom_logger.get_chat_completion_prompt(
                model=model,
                messages=messages,
                non_default_params=non_default_params,
@ -464,7 +467,6 @@ class Logging(LiteLLMLoggingBaseClass):
                prompt_variables=prompt_variables,
                dynamic_callback_params=self.standard_callback_dynamic_params,
            )
            )
        self.messages = messages
        return model, messages, non_default_params
@ -541,12 +543,11 @@ class Logging(LiteLLMLoggingBaseClass):
            model
        ):  # if model name was changes pre-call, overwrite the initial model call name with the new one
            self.model_call_details["model"] = model
-        self.model_call_details["litellm_params"]["api_base"] = (
+        self.model_call_details["litellm_params"][
-            self._get_masked_api_base(additional_args.get("api_base", ""))
+            "api_base"
-        )
+        ] = self._get_masked_api_base(additional_args.get("api_base", ""))
    def pre_call(self, input, api_key, model=None, additional_args={}):  # noqa: PLR0915
        # Log the exact input to the LLM API
        litellm.error_logs["PRE_CALL"] = locals()
        try:
@ -568,19 +569,16 @@ class Logging(LiteLLMLoggingBaseClass):
                self.log_raw_request_response is True
                or log_raw_request_response is True
            ):
                _litellm_params = self.model_call_details.get("litellm_params", {})
                _metadata = _litellm_params.get("metadata", {}) or {}
                try:
                    # [Non-blocking Extra Debug Information in metadata]
                    if turn_off_message_logging is True:
-
+                        _metadata[
-                        _metadata["raw_request"] = (
+                            "raw_request"
-                            "redacted by litellm. \
+                        ] = "redacted by litellm. \
                            'litellm.turn_off_message_logging=True'"
                        )
                    else:
                        curl_command = self._get_request_curl_command(
                            api_base=additional_args.get("api_base", ""),
                            headers=additional_args.get("headers", {}),
@ -590,8 +588,9 @@ class Logging(LiteLLMLoggingBaseClass):
                        _metadata["raw_request"] = str(curl_command)
                        # split up, so it's easier to parse in the UI
-                        self.model_call_details["raw_request_typed_dict"] = (
+                        self.model_call_details[
-                            RawRequestTypedDict(
+                            "raw_request_typed_dict"
                        ] = RawRequestTypedDict(
                            raw_request_api_base=str(
                                additional_args.get("api_base") or ""
                            ),
@ -604,20 +603,19 @@ class Logging(LiteLLMLoggingBaseClass):
                            ),
                            error=None,
                        )
                        )
                except Exception as e:
-                    self.model_call_details["raw_request_typed_dict"] = (
+                    self.model_call_details[
-                        RawRequestTypedDict(
+                        "raw_request_typed_dict"
                    ] = RawRequestTypedDict(
                        error=str(e),
                    )
                    )
                    traceback.print_exc()
-                    _metadata["raw_request"] = (
+                    _metadata[
-                        "Unable to Log \
+                        "raw_request"
                    ] = "Unable to Log \
                        raw request: {}".format(
                        str(e)
                    )
                    )
            if self.logger_fn and callable(self.logger_fn):
                try:
                    self.logger_fn(
@ -941,9 +939,9 @@ class Logging(LiteLLMLoggingBaseClass):
            verbose_logger.debug(
                f"response_cost_failure_debug_information: {debug_info}"
            )
-            self.model_call_details["response_cost_failure_debug_information"] = (
+            self.model_call_details[
-                debug_info
+                "response_cost_failure_debug_information"
-            )
+            ] = debug_info
            return None
        try:
@ -968,9 +966,9 @@ class Logging(LiteLLMLoggingBaseClass):
            verbose_logger.debug(
                f"response_cost_failure_debug_information: {debug_info}"
            )
-            self.model_call_details["response_cost_failure_debug_information"] = (
+            self.model_call_details[
-                debug_info
+                "response_cost_failure_debug_information"
-            )
+            ] = debug_info
        return None
@ -995,7 +993,6 @@ class Logging(LiteLLMLoggingBaseClass):
    def should_run_callback(
        self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
    ) -> bool:
        if litellm.global_disable_no_log_param:
            return True
@ -1027,9 +1024,9 @@ class Logging(LiteLLMLoggingBaseClass):
                end_time = datetime.datetime.now()
            if self.completion_start_time is None:
                self.completion_start_time = end_time
-                self.model_call_details["completion_start_time"] = (
+                self.model_call_details[
-                    self.completion_start_time
+                    "completion_start_time"
-                )
+                ] = self.completion_start_time
            self.model_call_details["log_event_type"] = "successful_api_call"
            self.model_call_details["end_time"] = end_time
            self.model_call_details["cache_hit"] = cache_hit
@ -1083,13 +1080,14 @@ class Logging(LiteLLMLoggingBaseClass):
                            "response_cost"
                        ]
                    else:
-                        self.model_call_details["response_cost"] = (
+                        self.model_call_details[
-                            self._response_cost_calculator(result=result)
+                            "response_cost"
-                        )
+                        ] = self._response_cost_calculator(result=result)
                    ## STANDARDIZED LOGGING PAYLOAD
-                    self.model_call_details["standard_logging_object"] = (
+                    self.model_call_details[
-                        get_standard_logging_object_payload(
+                        "standard_logging_object"
                    ] = get_standard_logging_object_payload(
                        kwargs=self.model_call_details,
                        init_response_obj=result,
                        start_time=start_time,
@ -1098,11 +1096,11 @@ class Logging(LiteLLMLoggingBaseClass):
                        status="success",
                        standard_built_in_tools_params=self.standard_built_in_tools_params,
                    )
-                    )
+                elif isinstance(result, dict) or isinstance(result, list):
                elif isinstance(result, dict):  # pass-through endpoints
                    ## STANDARDIZED LOGGING PAYLOAD
-                    self.model_call_details["standard_logging_object"] = (
+                    self.model_call_details[
-                        get_standard_logging_object_payload(
+                        "standard_logging_object"
                    ] = get_standard_logging_object_payload(
                        kwargs=self.model_call_details,
                        init_response_obj=result,
                        start_time=start_time,
@ -1111,11 +1109,10 @@ class Logging(LiteLLMLoggingBaseClass):
                        status="success",
                        standard_built_in_tools_params=self.standard_built_in_tools_params,
                    )
                    )
            elif standard_logging_object is not None:
-                self.model_call_details["standard_logging_object"] = (
+                self.model_call_details[
-                    standard_logging_object
+                    "standard_logging_object"
-                )
+                ] = standard_logging_object
            else:  # streaming chunks + image gen.
                self.model_call_details["response_cost"] = None
@ -1154,7 +1151,6 @@ class Logging(LiteLLMLoggingBaseClass):
            standard_logging_object=kwargs.get("standard_logging_object", None),
        )
        try:
            ## BUILD COMPLETE STREAMED RESPONSE
            complete_streaming_response: Optional[
                Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
@ -1172,15 +1168,16 @@ class Logging(LiteLLMLoggingBaseClass):
                verbose_logger.debug(
                    "Logging Details LiteLLM-Success Call streaming complete"
                )
-                self.model_call_details["complete_streaming_response"] = (
+                self.model_call_details[
-                    complete_streaming_response
+                    "complete_streaming_response"
-                )
+                ] = complete_streaming_response
-                self.model_call_details["response_cost"] = (
+                self.model_call_details[
-                    self._response_cost_calculator(result=complete_streaming_response)
+                    "response_cost"
-                )
+                ] = self._response_cost_calculator(result=complete_streaming_response)
                ## STANDARDIZED LOGGING PAYLOAD
-                self.model_call_details["standard_logging_object"] = (
+                self.model_call_details[
-                    get_standard_logging_object_payload(
+                    "standard_logging_object"
                ] = get_standard_logging_object_payload(
                    kwargs=self.model_call_details,
                    init_response_obj=complete_streaming_response,
                    start_time=start_time,
@ -1189,7 +1186,6 @@ class Logging(LiteLLMLoggingBaseClass):
                    status="success",
                    standard_built_in_tools_params=self.standard_built_in_tools_params,
                )
                )
            callbacks = self.get_combined_callback_list(
                dynamic_success_callbacks=self.dynamic_success_callbacks,
                global_callbacks=litellm.success_callback,
@ -1207,7 +1203,6 @@ class Logging(LiteLLMLoggingBaseClass):
            ## LOGGING HOOK ##
            for callback in callbacks:
                if isinstance(callback, CustomLogger):
                    self.model_call_details, result = callback.logging_hook(
                        kwargs=self.model_call_details,
                        result=result,
@ -1538,11 +1533,11 @@ class Logging(LiteLLMLoggingBaseClass):
                            )
                        else:
                            if self.stream and complete_streaming_response:
-                                self.model_call_details["complete_response"] = (
+                                self.model_call_details[
-                                    self.model_call_details.get(
+                                    "complete_response"
                                ] = self.model_call_details.get(
                                    "complete_streaming_response", {}
                                )
                                )
                                result = self.model_call_details["complete_response"]
                            openMeterLogger.log_success_event(
                                kwargs=self.model_call_details,
@ -1581,11 +1576,11 @@ class Logging(LiteLLMLoggingBaseClass):
                            )
                        else:
                            if self.stream and complete_streaming_response:
-                                self.model_call_details["complete_response"] = (
+                                self.model_call_details[
-                                    self.model_call_details.get(
+                                    "complete_response"
                                ] = self.model_call_details.get(
                                    "complete_streaming_response", {}
                                )
                                )
                                result = self.model_call_details["complete_response"]
                            callback.log_success_event(
@ -1659,7 +1654,6 @@ class Logging(LiteLLMLoggingBaseClass):
        if self.call_type == CallTypes.aretrieve_batch.value and isinstance(
            result, LiteLLMBatch
        ):
            response_cost, batch_usage, batch_models = await _handle_completed_batch(
                batch=result, custom_llm_provider=self.custom_llm_provider
            )
@ -1692,9 +1686,9 @@ class Logging(LiteLLMLoggingBaseClass):
        if complete_streaming_response is not None:
            print_verbose("Async success callbacks: Got a complete streaming response")
-            self.model_call_details["async_complete_streaming_response"] = (
+            self.model_call_details[
-                complete_streaming_response
+                "async_complete_streaming_response"
-            )
+            ] = complete_streaming_response
            try:
                if self.model_call_details.get("cache_hit", False) is True:
                    self.model_call_details["response_cost"] = 0.0
@ -1704,11 +1698,11 @@ class Logging(LiteLLMLoggingBaseClass):
                        model_call_details=self.model_call_details
                    )
                    # base_model defaults to None if not set on model_info
-                    self.model_call_details["response_cost"] = (
+                    self.model_call_details[
-                        self._response_cost_calculator(
+                        "response_cost"
                    ] = self._response_cost_calculator(
                        result=complete_streaming_response
                    )
                    )
                verbose_logger.debug(
                    f"Model={self.model}; cost={self.model_call_details['response_cost']}"
@ -1720,8 +1714,9 @@ class Logging(LiteLLMLoggingBaseClass):
                self.model_call_details["response_cost"] = None
            ## STANDARDIZED LOGGING PAYLOAD
-            self.model_call_details["standard_logging_object"] = (
+            self.model_call_details[
-                get_standard_logging_object_payload(
+                "standard_logging_object"
            ] = get_standard_logging_object_payload(
                kwargs=self.model_call_details,
                init_response_obj=complete_streaming_response,
                start_time=start_time,
@ -1730,7 +1725,6 @@ class Logging(LiteLLMLoggingBaseClass):
                status="success",
                standard_built_in_tools_params=self.standard_built_in_tools_params,
            )
            )
        callbacks = self.get_combined_callback_list(
            dynamic_success_callbacks=self.dynamic_async_success_callbacks,
            global_callbacks=litellm._async_success_callback,
@ -1935,8 +1929,9 @@ class Logging(LiteLLMLoggingBaseClass):
        ## STANDARDIZED LOGGING PAYLOAD
-        self.model_call_details["standard_logging_object"] = (
+        self.model_call_details[
-            get_standard_logging_object_payload(
+            "standard_logging_object"
        ] = get_standard_logging_object_payload(
            kwargs=self.model_call_details,
            init_response_obj={},
            start_time=start_time,
@ -1947,7 +1942,6 @@ class Logging(LiteLLMLoggingBaseClass):
            original_exception=exception,
            standard_built_in_tools_params=self.standard_built_in_tools_params,
        )
        )
        return start_time, end_time
    async def special_failure_handlers(self, exception: Exception):
@ -2084,7 +2078,6 @@ class Logging(LiteLLMLoggingBaseClass):
                        )
                        is not True
                    ):  # custom logger class
                        callback.log_failure_event(
                            start_time=start_time,
                            end_time=end_time,
@ -2713,9 +2706,9 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
                endpoint=arize_config.endpoint,
            )
-            os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
+            os.environ[
-                f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
+                "OTEL_EXPORTER_OTLP_TRACES_HEADERS"
-            )
+            ] = f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
            for callback in _in_memory_loggers:
                if (
                    isinstance(callback, ArizeLogger)
@ -2739,9 +2732,9 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
            # auth can be disabled on local deployments of arize phoenix
            if arize_phoenix_config.otlp_auth_headers is not None:
-                os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
+                os.environ[
-                    arize_phoenix_config.otlp_auth_headers
+                    "OTEL_EXPORTER_OTLP_TRACES_HEADERS"
-                )
+                ] = arize_phoenix_config.otlp_auth_headers
            for callback in _in_memory_loggers:
                if (
@ -2832,9 +2825,9 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
                exporter="otlp_http",
                endpoint="https://langtrace.ai/api/trace",
            )
-            os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
+            os.environ[
-                f"api_key={os.getenv('LANGTRACE_API_KEY')}"
+                "OTEL_EXPORTER_OTLP_TRACES_HEADERS"
-            )
+            ] = f"api_key={os.getenv('LANGTRACE_API_KEY')}"
            for callback in _in_memory_loggers:
                if (
                    isinstance(callback, OpenTelemetry)
@ -3114,6 +3107,7 @@ class StandardLoggingPayloadSetup:
        litellm_params: Optional[dict] = None,
        prompt_integration: Optional[str] = None,
        applied_guardrails: Optional[List[str]] = None,
        mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
    ) -> StandardLoggingMetadata:
        """
        Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
@ -3160,6 +3154,7 @@ class StandardLoggingPayloadSetup:
            user_api_key_end_user_id=None,
            prompt_management_metadata=prompt_management_metadata,
            applied_guardrails=applied_guardrails,
            mcp_tool_call_metadata=mcp_tool_call_metadata,
        )
        if isinstance(metadata, dict):
            # Filter the metadata dictionary to include only the specified keys
@ -3223,7 +3218,6 @@ class StandardLoggingPayloadSetup:
        custom_llm_provider: Optional[str],
        init_response_obj: Union[Any, BaseModel, dict],
    ) -> StandardLoggingModelInformation:
        model_cost_name = _select_model_name_for_cost_calc(
            model=None,
            completion_response=init_response_obj,  # type: ignore
@ -3286,7 +3280,6 @@ class StandardLoggingPayloadSetup:
    def get_additional_headers(
        additiona_headers: Optional[dict],
    ) -> Optional[StandardLoggingAdditionalHeaders]:
        if additiona_headers is None:
            return None
@ -3322,11 +3315,11 @@ class StandardLoggingPayloadSetup:
            for key in StandardLoggingHiddenParams.__annotations__.keys():
                if key in hidden_params:
                    if key == "additional_headers":
-                        clean_hidden_params["additional_headers"] = (
+                        clean_hidden_params[
-                            StandardLoggingPayloadSetup.get_additional_headers(
+                            "additional_headers"
                        ] = StandardLoggingPayloadSetup.get_additional_headers(
                            hidden_params[key]
                        )
                        )
                    else:
                        clean_hidden_params[key] = hidden_params[key]  # type: ignore
        return clean_hidden_params
@ -3463,13 +3456,15 @@ def get_standard_logging_object_payload(
        )
        # cleanup timestamps
-        start_time_float, end_time_float, completion_start_time_float = (
+        (
-            StandardLoggingPayloadSetup.cleanup_timestamps(
+            start_time_float,
            end_time_float,
            completion_start_time_float,
        ) = StandardLoggingPayloadSetup.cleanup_timestamps(
            start_time=start_time,
            end_time=end_time,
            completion_start_time=completion_start_time,
        )
        )
        response_time = StandardLoggingPayloadSetup.get_response_time(
            start_time_float=start_time_float,
            end_time_float=end_time_float,
@ -3486,6 +3481,7 @@ def get_standard_logging_object_payload(
            litellm_params=litellm_params,
            prompt_integration=kwargs.get("prompt_integration", None),
            applied_guardrails=kwargs.get("applied_guardrails", None),
            mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
        )
        _request_body = proxy_server_request.get("body", {})
@ -3495,7 +3491,6 @@ def get_standard_logging_object_payload(
        saved_cache_cost: float = 0.0
        if cache_hit is True:
            id = f"{id}_cache_hit{time.time()}"  # do not duplicate the request id
            saved_cache_cost = (
                logging_obj._response_cost_calculator(
@ -3626,6 +3621,7 @@ def get_standard_logging_metadata(
        user_api_key_end_user_id=None,
        prompt_management_metadata=None,
        applied_guardrails=None,
        mcp_tool_call_metadata=None,
    )
    if isinstance(metadata, dict):
        # Filter the metadata dictionary to include only the specified keys
@ -3658,9 +3654,9 @@ def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]):
    ):
        for k, v in metadata["user_api_key_metadata"].items():
            if k == "logging":  # prevent logging user logging keys
-                cleaned_user_api_key_metadata[k] = (
+                cleaned_user_api_key_metadata[
-                    "scrubbed_by_litellm_for_sensitive_keys"
+                    k
-                )
+                ] = "scrubbed_by_litellm_for_sensitive_keys"
            else:
                cleaned_user_api_key_metadata[k] = v
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -1,7 +1,7 @@
 # What is this?
 ## Helper utilities for cost_per_token()
-from typing import Optional, Tuple
+from typing import Optional, Tuple, cast
 import litellm
 from litellm import verbose_logger
@ -121,6 +121,31 @@ def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> floa
    return model_info["output_cost_per_token"]
 def calculate_cost_component(
    model_info: ModelInfo, cost_key: str, usage_value: Optional[float]
 ) -> float:
    """
    Generic cost calculator for any usage component
    Args:
        model_info: Dictionary containing cost information
        cost_key: The key for the cost multiplier in model_info (e.g., 'input_cost_per_audio_token')
        usage_value: The actual usage value (e.g., number of tokens, characters, seconds)
    Returns:
        float: The calculated cost
    """
    cost_per_unit = model_info.get(cost_key)
    if (
        cost_per_unit is not None
        and isinstance(cost_per_unit, float)
        and usage_value is not None
        and usage_value > 0
    ):
        return float(usage_value) * cost_per_unit
    return 0.0
 def generic_cost_per_token(
    model: str, usage: Usage, custom_llm_provider: str
 ) -> Tuple[float, float]:
@ -136,6 +161,7 @@ def generic_cost_per_token(
    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """
    ## GET MODEL INFO
    model_info = get_model_info(model=model, custom_llm_provider=custom_llm_provider)
@ -143,38 +169,124 @@ def generic_cost_per_token(
    ### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
    prompt_cost = 0.0
    ### PROCESSING COST
-    non_cache_hit_tokens = usage.prompt_tokens
+    text_tokens = usage.prompt_tokens
    cache_hit_tokens = 0
-    if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
+    audio_tokens = 0
-        cache_hit_tokens = usage.prompt_tokens_details.cached_tokens
+    character_count = 0
-        non_cache_hit_tokens = non_cache_hit_tokens - cache_hit_tokens
+    image_count = 0
    video_length_seconds = 0
    if usage.prompt_tokens_details:
        cache_hit_tokens = (
            cast(
                Optional[int], getattr(usage.prompt_tokens_details, "cached_tokens", 0)
            )
            or 0
        )
        text_tokens = (
            cast(
                Optional[int], getattr(usage.prompt_tokens_details, "text_tokens", None)
            )
            or 0  # default to prompt tokens, if this field is not set
        )
        audio_tokens = (
            cast(Optional[int], getattr(usage.prompt_tokens_details, "audio_tokens", 0))
            or 0
        )
        character_count = (
            cast(
                Optional[int],
                getattr(usage.prompt_tokens_details, "character_count", 0),
            )
            or 0
        )
        image_count = (
            cast(Optional[int], getattr(usage.prompt_tokens_details, "image_count", 0))
            or 0
        )
        video_length_seconds = (
            cast(
                Optional[int],
                getattr(usage.prompt_tokens_details, "video_length_seconds", 0),
            )
            or 0
        )
    ## EDGE CASE - text tokens not set inside PromptTokensDetails
    if text_tokens == 0:
        text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
    prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
-    prompt_cost = float(non_cache_hit_tokens) * prompt_base_cost
+    prompt_cost = float(text_tokens) * prompt_base_cost
-    _cache_read_input_token_cost = model_info.get("cache_read_input_token_cost")
+    ### CACHE READ COST
-    if (
+    prompt_cost += calculate_cost_component(
-        _cache_read_input_token_cost is not None
+        model_info, "cache_read_input_token_cost", cache_hit_tokens
-        and usage.prompt_tokens_details
+    )
-        and usage.prompt_tokens_details.cached_tokens
+
-    ):
+    ### AUDIO COST
-        prompt_cost += (
+    prompt_cost += calculate_cost_component(
-            float(usage.prompt_tokens_details.cached_tokens)
+        model_info, "input_cost_per_audio_token", audio_tokens
            * _cache_read_input_token_cost
    )
    ### CACHE WRITING COST
-    _cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost")
+    prompt_cost += calculate_cost_component(
-    if _cache_creation_input_token_cost is not None:
+        model_info,
-        prompt_cost += (
+        "cache_creation_input_token_cost",
-            float(usage._cache_creation_input_tokens) * _cache_creation_input_token_cost
+        usage._cache_creation_input_tokens,
    )
    ### CHARACTER COST
    prompt_cost += calculate_cost_component(
        model_info, "input_cost_per_character", character_count
    )
    ### IMAGE COUNT COST
    prompt_cost += calculate_cost_component(
        model_info, "input_cost_per_image", image_count
    )
    ### VIDEO LENGTH COST
    prompt_cost += calculate_cost_component(
        model_info, "input_cost_per_video_per_second", video_length_seconds
    )
    ## CALCULATE OUTPUT COST
    completion_base_cost = _get_completion_token_base_cost(
        model_info=model_info, usage=usage
    )
-    completion_cost = usage["completion_tokens"] * completion_base_cost
+    text_tokens = usage.completion_tokens
    audio_tokens = 0
    if usage.completion_tokens_details is not None:
        audio_tokens = (
            cast(
                Optional[int],
                getattr(usage.completion_tokens_details, "audio_tokens", 0),
            )
            or 0
        )
        text_tokens = (
            cast(
                Optional[int],
                getattr(usage.completion_tokens_details, "text_tokens", None),
            )
            or usage.completion_tokens  # default to completion tokens, if this field is not set
        )
    ## TEXT COST
    completion_cost = float(text_tokens) * completion_base_cost
    _output_cost_per_audio_token: Optional[float] = model_info.get(
        "output_cost_per_audio_token"
    )
    ## AUDIO COST
    if (
        _output_cost_per_audio_token is not None
        and audio_tokens is not None
        and audio_tokens > 0
    ):
        completion_cost += float(audio_tokens) * _output_cost_per_audio_token
    return prompt_cost, completion_cost
--- a/Show more
+++ b/Show more
		`@ -0,0 +1,3 @@`
							`-- AlterTable`
							`ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "api_requests" INTEGER NOT NULL DEFAULT 0;`