Merge branch 'main' into litellm_8864-feature-vertex-anyOf-support

2025-04-26 11:14:04 +00:00 · 2025-03-28 10:25:04 -07:00 · 2025-03-28 10:25:04 -07:00 · b72fbdde74
commit b72fbdde74
parent 9437ee5e1f 69e28b92c6
220 changed files with 8566 additions and 1345 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -3,6 +3,18 @@ orbs:
  codecov: codecov/codecov@4.0.1
  node: circleci/node@5.1.0  # Add this line to declare the node orb
 commands:
  setup_google_dns:
    steps:
      - run:
          name: "Configure Google DNS"
          command: |
            # Backup original resolv.conf
            sudo cp /etc/resolv.conf /etc/resolv.conf.backup
            # Add both local and Google DNS servers
            echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
            echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
            echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
 jobs:
  local_testing:
@ -15,7 +27,7 @@ jobs:
    steps:
      - checkout
-
+      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -66,7 +78,7 @@ jobs:
            pip install python-multipart
            pip install google-cloud-aiplatform
            pip install prometheus-client==0.20.0
-            pip install "pydantic==2.7.1"
+            pip install "pydantic==2.10.2"
            pip install "diskcache==5.6.1"
            pip install "Pillow==10.3.0"
            pip install "jsonschema==4.22.0"
@ -134,7 +146,7 @@ jobs:
    steps:
      - checkout
-
+      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -185,7 +197,7 @@ jobs:
            pip install python-multipart
            pip install google-cloud-aiplatform
            pip install prometheus-client==0.20.0
-            pip install "pydantic==2.7.1"
+            pip install "pydantic==2.10.2"
            pip install "diskcache==5.6.1"
            pip install "Pillow==10.3.0"
            pip install "jsonschema==4.22.0"
@ -234,7 +246,13 @@ jobs:
    steps:
      - checkout
-
+      - setup_google_dns
      - run:
          name: DNS lookup for Redis host
          command: |
            sudo apt-get update
            sudo apt-get install -y dnsutils
            dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
      - run:
          name: Show git commit hash
          command: |
@ -285,7 +303,7 @@ jobs:
            pip install python-multipart
            pip install google-cloud-aiplatform
            pip install prometheus-client==0.20.0
-            pip install "pydantic==2.7.1"
+            pip install "pydantic==2.10.2"
            pip install "diskcache==5.6.1"
            pip install "Pillow==10.3.0"
            pip install "jsonschema==4.22.0"
@ -334,6 +352,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -388,6 +407,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -429,6 +449,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -479,7 +500,13 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
-
+      - run:
          name: Install PostgreSQL
          command: |
            sudo apt-get update
            sudo apt-get install postgresql postgresql-contrib
            echo 'export PATH=/usr/lib/postgresql/*/bin:$PATH' >> $BASH_ENV
      - setup_google_dns
      - run:
          name: Show git commit hash
          command: |
@ -530,10 +557,11 @@ jobs:
            pip install python-multipart
            pip install google-cloud-aiplatform
            pip install prometheus-client==0.20.0
-            pip install "pydantic==2.7.1"
+            pip install "pydantic==2.10.2"
            pip install "diskcache==5.6.1"
            pip install "Pillow==10.3.0"
            pip install "jsonschema==4.22.0"
            pip install "pytest-postgresql==7.0.1"
      - save_cache:
          paths:
            - ./venv
@ -569,7 +597,7 @@ jobs:
            - litellm_proxy_unit_tests_coverage
  litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
    docker:
-      - image: cimg/python:3.11
+      - image: cimg/python:3.13.1
        auth:
          username: ${DOCKERHUB_USERNAME}
          password: ${DOCKERHUB_PASSWORD}
@ -577,6 +605,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -618,6 +647,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -625,7 +655,13 @@ jobs:
            python -m pip install -r requirements.txt
            pip install "pytest==7.3.1"
            pip install "pytest-retry==1.6.3"
            pip install "pytest-cov==5.0.0"
            pip install "pytest-asyncio==0.21.1"
            pip install "respx==0.21.1"
      - run:
          name: Show current pydantic version
          command: |
            python -m pip show pydantic
      # Run pytest and generate JUnit XML report
      - run:
          name: Run tests
@ -648,6 +684,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -690,6 +727,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -700,8 +738,8 @@ jobs:
            pip install "pytest-cov==5.0.0"
            pip install "pytest-asyncio==0.21.1"
            pip install "respx==0.21.1"
-            pip install "pydantic==2.7.2"
+            pip install "pydantic==2.10.2"
-            pip install "mcp==1.4.1"
+            pip install "mcp==1.5.0"
      # Run pytest and generate JUnit XML report
      - run:
          name: Run tests
@ -734,6 +772,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -776,6 +815,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -788,8 +828,8 @@ jobs:
            pip install "pytest-asyncio==0.21.1"
            pip install "respx==0.21.1"
            pip install "hypercorn==0.17.3"
-            pip install "pydantic==2.7.2"
+            pip install "pydantic==2.10.2"
-            pip install "mcp==1.4.1"
+            pip install "mcp==1.5.0"
      # Run pytest and generate JUnit XML report
      - run:
          name: Run tests
@ -822,6 +862,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -866,10 +907,12 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
            python -m pip install --upgrade pip
            pip install numpydoc
            python -m pip install -r requirements.txt
            pip install "respx==0.21.1"
            pip install "pytest==7.3.1"
@ -878,7 +921,6 @@ jobs:
            pip install "pytest-cov==5.0.0"
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install numpydoc
      # Run pytest and generate JUnit XML report
      - run:
          name: Run tests
@ -912,6 +954,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -954,6 +997,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -996,6 +1040,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1042,6 +1087,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1054,8 +1100,8 @@ jobs:
            pip install click
            pip install "boto3==1.34.34"
            pip install jinja2
-            pip install tokenizers=="0.20.0"
+            pip install "tokenizers==0.20.0"
-            pip install uvloop==0.21.0
+            pip install "uvloop==0.21.0"
            pip install jsonschema
      - run:
          name: Run tests
@ -1074,6 +1120,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1098,6 +1145,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      # Install Helm
      - run:
          name: Install Helm
@ -1167,6 +1215,7 @@ jobs:
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Dependencies
          command: |
@ -1203,6 +1252,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Python 3.9
          command: |
@ -1277,6 +1327,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1412,6 +1463,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1448,6 +1500,7 @@ jobs:
            pip install "boto3==1.34.34"
            pip install "aioboto3==12.3.0"
            pip install langchain
            pip install "langchain_mcp_adapters==0.0.5"
            pip install "langfuse>=2.0.0"
            pip install "logfire==0.29.0"
            pip install numpydoc
@ -1535,6 +1588,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1697,6 +1751,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1808,6 +1863,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -1847,7 +1903,7 @@ jobs:
          command: |
            docker run -d \
              -p 4000:4000 \
-              -e DATABASE_URL=$PROXY_DATABASE_URL \
+              -e DATABASE_URL=$CLEAN_STORE_MODEL_IN_DB_DATABASE_URL \
              -e STORE_MODEL_IN_DB="True" \
              -e LITELLM_MASTER_KEY="sk-1234" \
              -e LITELLM_LICENSE=$LITELLM_LICENSE \
@ -1890,6 +1946,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      # Remove Docker CLI installation since it's already available in machine executor
      - run:
          name: Install Python 3.13
@ -1987,6 +2044,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
@ -2014,7 +2072,7 @@ jobs:
            pip install "openai==1.68.2"
            pip install "assemblyai==0.37.0"
            python -m pip install --upgrade pip
-            pip install "pydantic==2.7.1"
+            pip install "pydantic==2.10.2"
            pip install "pytest==7.3.1"
            pip install "pytest-mock==3.12.0"
            pip install "pytest-asyncio==0.21.1"
@ -2031,6 +2089,9 @@ jobs:
            pip install "PyGithub==1.59.1"
            pip install "google-cloud-aiplatform==1.59.0"
            pip install "anthropic==0.49.0"
            pip install "langchain_mcp_adapters==0.0.5"
            pip install "langchain_openai==0.2.1"
            pip install "langgraph==0.3.18"
      # Run pytest and generate JUnit XML report
      - run:
          name: Build Docker image
@ -2243,6 +2304,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Build UI
          command: |
@ -2289,7 +2351,7 @@ jobs:
            pip install aiohttp
            pip install "openai==1.68.2"
            python -m pip install --upgrade pip
-            pip install "pydantic==2.7.1"
+            pip install "pydantic==2.10.2"
            pip install "pytest==7.3.1"
            pip install "pytest-mock==3.12.0"
            pip install "pytest-asyncio==0.21.1"
@ -2357,6 +2419,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Build Docker image
          command: |
@ -2379,6 +2442,7 @@ jobs:
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Build Docker image
          command: |
--- a/.circleci/requirements.txt
+++ b/.circleci/requirements.txt
@ -4,10 +4,12 @@ python-dotenv
 tiktoken
 importlib_metadata
 cohere
-redis
+redis==5.2.1
 redisvl==0.4.1
 anthropic
 orjson==3.9.15
-pydantic==2.7.1
+pydantic==2.10.2
 google-cloud-aiplatform==1.43.0
 fastapi-sso==0.10.0
 uvloop==0.21.0
 mcp==1.5.0    # for MCP server  
--- a/.github/workflows/publish-migrations.yml
+++ b/.github/workflows/publish-migrations.yml
@ -0,0 +1,206 @@
 name: Publish Prisma Migrations
 permissions:
  contents: write
  pull-requests: write
 on:
  push:
    paths:
      - 'schema.prisma'  # Check root schema.prisma
    branches:
      - main
 jobs:
  publish-migrations:
    runs-on: ubuntu-latest
    services:
      postgres:
        image: postgres:14
        env:
          POSTGRES_DB: temp_db
          POSTGRES_USER: postgres
          POSTGRES_PASSWORD: postgres
        ports:
          - 5432:5432
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
      # Add shadow database service
      postgres_shadow:
        image: postgres:14
        env:
          POSTGRES_DB: shadow_db
          POSTGRES_USER: postgres
          POSTGRES_PASSWORD: postgres
        ports:
          - 5433:5432
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
    steps:
      - uses: actions/checkout@v3
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.x'
      - name: Install Dependencies
        run: |
          pip install prisma
          pip install python-dotenv
      - name: Generate Initial Migration if None Exists
        env:
          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
        run: |
          mkdir -p deploy/migrations
          echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
          if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
            echo "No existing migrations found, creating baseline..."
            VERSION=$(date +%Y%m%d%H%M%S)
            mkdir -p deploy/migrations/${VERSION}_initial
            echo "Generating initial migration..."
            # Save raw output for debugging
            prisma migrate diff \
              --from-empty \
              --to-schema-datamodel schema.prisma \
              --shadow-database-url "${SHADOW_DATABASE_URL}" \
              --script > deploy/migrations/${VERSION}_initial/raw_migration.sql
            echo "Raw migration file content:"
            cat deploy/migrations/${VERSION}_initial/raw_migration.sql
            echo "Cleaning migration file..."
            # Clean the file
            sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
            # Verify the migration file
            if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
              echo "ERROR: Migration file is empty after cleaning"
              echo "Original content was:"
              cat deploy/migrations/${VERSION}_initial/raw_migration.sql
              exit 1
            fi
            echo "Final migration file content:"
            cat deploy/migrations/${VERSION}_initial/migration.sql
            # Verify it starts with SQL
            if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
              echo "ERROR: Migration file does not start with SQL command or comment"
              echo "First line is:"
              head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
              echo "Full content is:"
              cat deploy/migrations/${VERSION}_initial/migration.sql
              exit 1
            fi
            echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
          fi
      - name: Compare and Generate Migration
        if: success()
        env:
          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
        run: |
          # Create temporary migration workspace
          mkdir -p temp_migrations
          # Copy existing migrations (will not fail if directory is empty)
          cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
          VERSION=$(date +%Y%m%d%H%M%S)
          # Generate diff against existing migrations or empty state
          prisma migrate diff \
            --from-migrations temp_migrations \
            --to-schema-datamodel schema.prisma \
            --shadow-database-url "${SHADOW_DATABASE_URL}" \
            --script > temp_migrations/migration_${VERSION}.sql
          # Check if there are actual changes
          if [ -s temp_migrations/migration_${VERSION}.sql ]; then
            echo "Changes detected, creating new migration"
            mkdir -p deploy/migrations/${VERSION}_schema_update
            mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
            echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
          else
            echo "No schema changes detected"
            exit 0
          fi
      - name: Verify Migration
        if: success()
        env:
          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
        run: |
          # Create test database
          psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
          # Apply all migrations in order to verify
          for migration in deploy/migrations/*/migration.sql; do
            echo "Applying migration: $migration"
            psql "${SHADOW_DATABASE_URL}" -f $migration
          done
      # Add this step before create-pull-request to debug permissions
      - name: Check Token Permissions
        run: |
          echo "Checking token permissions..."
          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
               -H "Accept: application/vnd.github.v3+json" \
               https://api.github.com/repos/BerriAI/litellm/collaborators
          echo "\nChecking if token can create PRs..."
          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
               -H "Accept: application/vnd.github.v3+json" \
               https://api.github.com/repos/BerriAI/litellm
      # Add this debug step before git push
      - name: Debug Changed Files
        run: |
          echo "Files staged for commit:"
          git diff --name-status --staged
          echo "\nAll changed files:"
          git status
      - name: Create Pull Request
        if: success()
        uses: peter-evans/create-pull-request@v5
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          commit-message: "chore: update prisma migrations"
          title: "Update Prisma Migrations"
          body: |
            Auto-generated migration based on schema.prisma changes.
            Generated files:
            - deploy/migrations/${VERSION}_schema_update/migration.sql
            - deploy/migrations/${VERSION}_schema_update/README.md
          branch: feat/prisma-migration-${{ env.VERSION }}
          base: main
          delete-branch: true
      - name: Generate and Save Migrations
        run: |
          # Only add migration files
          git add deploy/migrations/
          git status  # Debug what's being committed
          git commit -m "chore: update prisma migrations" 
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 .python-version
 .venv
 .env
 .newenv
--- a/3
+++ b/3
@ -37,9 +37,6 @@ RUN pip install dist/*.whl
 # install dependencies as wheels
 RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
 # install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 
 RUN pip install redisvl==0.0.7 --no-deps
 # ensure pyjwt is used, not jwt
 RUN pip uninstall jwt -y
 RUN pip uninstall PyJWT -y
--- a/README.md
+++ b/README.md
@ -16,9 +16,6 @@
    <a href="https://pypi.org/project/litellm/" target="_blank">
        <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
    </a>
    <a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
        <img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
    </a>
    <a href="https://www.ycombinator.com/companies/berriai">
        <img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
    </a>
--- a/ci_cd/baseline_db.py
+++ b/ci_cd/baseline_db.py
@ -0,0 +1,60 @@
 import subprocess
 from pathlib import Path
 from datetime import datetime
 def create_baseline():
    """Create baseline migration in deploy/migrations"""
    try:
        # Get paths
        root_dir = Path(__file__).parent.parent
        deploy_dir = root_dir / "deploy"
        migrations_dir = deploy_dir / "migrations"
        schema_path = root_dir / "schema.prisma"
        # Create migrations directory
        migrations_dir.mkdir(parents=True, exist_ok=True)
        # Create migration_lock.toml if it doesn't exist
        lock_file = migrations_dir / "migration_lock.toml"
        if not lock_file.exists():
            lock_file.write_text('provider = "postgresql"\n')
        # Create timestamp-based migration directory
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        migration_dir = migrations_dir / f"{timestamp}_baseline"
        migration_dir.mkdir(parents=True, exist_ok=True)
        # Generate migration SQL
        result = subprocess.run(
            [
                "prisma",
                "migrate",
                "diff",
                "--from-empty",
                "--to-schema-datamodel",
                str(schema_path),
                "--script",
            ],
            capture_output=True,
            text=True,
            check=True,
        )
        # Write the SQL to migration.sql
        migration_file = migration_dir / "migration.sql"
        migration_file.write_text(result.stdout)
        print(f"Created baseline migration in {migration_dir}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Error running prisma command: {e.stderr}")
        return False
    except Exception as e:
        print(f"Error creating baseline migration: {str(e)}")
        return False
 if __name__ == "__main__":
    create_baseline()
--- a/ci_cd/run_migration.py
+++ b/ci_cd/run_migration.py
@ -0,0 +1,96 @@
 import os
 import subprocess
 from pathlib import Path
 from datetime import datetime
 import testing.postgresql
 import shutil
 def create_migration(migration_name: str = None):
    """
    Create a new migration SQL file in deploy/migrations directory by comparing
    current database state with schema
    Args:
        migration_name (str): Name for the migration
    """
    try:
        # Get paths
        root_dir = Path(__file__).parent.parent
        deploy_dir = root_dir / "deploy"
        migrations_dir = deploy_dir / "migrations"
        schema_path = root_dir / "schema.prisma"
        # Create temporary PostgreSQL database
        with testing.postgresql.Postgresql() as postgresql:
            db_url = postgresql.url()
            # Create temporary migrations directory next to schema.prisma
            temp_migrations_dir = schema_path.parent / "migrations"
            try:
                # Copy existing migrations to temp directory
                if temp_migrations_dir.exists():
                    shutil.rmtree(temp_migrations_dir)
                shutil.copytree(migrations_dir, temp_migrations_dir)
                # Apply existing migrations to temp database
                os.environ["DATABASE_URL"] = db_url
                subprocess.run(
                    ["prisma", "migrate", "deploy", "--schema", str(schema_path)],
                    check=True,
                )
                # Generate diff between current database and schema
                result = subprocess.run(
                    [
                        "prisma",
                        "migrate",
                        "diff",
                        "--from-url",
                        db_url,
                        "--to-schema-datamodel",
                        str(schema_path),
                        "--script",
                    ],
                    capture_output=True,
                    text=True,
                    check=True,
                )
                if result.stdout.strip():
                    # Generate timestamp and create migration directory
                    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                    migration_name = migration_name or "unnamed_migration"
                    migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
                    migration_dir.mkdir(parents=True, exist_ok=True)
                    # Write the SQL to migration.sql
                    migration_file = migration_dir / "migration.sql"
                    migration_file.write_text(result.stdout)
                    print(f"Created migration in {migration_dir}")
                    return True
                else:
                    print("No schema changes detected. Migration not needed.")
                    return False
            finally:
                # Clean up: remove temporary migrations directory
                if temp_migrations_dir.exists():
                    shutil.rmtree(temp_migrations_dir)
    except subprocess.CalledProcessError as e:
        print(f"Error generating migration: {e.stderr}")
        return False
    except Exception as e:
        print(f"Error creating migration: {str(e)}")
        return False
 if __name__ == "__main__":
    # If running directly, can optionally pass migration name as argument
    import sys
    migration_name = sys.argv[1] if len(sys.argv) > 1 else None
    create_migration(migration_name)
--- a/deploy/migrations/20250326162113_baseline/migration.sql
+++ b/deploy/migrations/20250326162113_baseline/migration.sql
@ -0,0 +1,360 @@
 -- CreateTable
 CREATE TABLE "LiteLLM_BudgetTable" (
    "budget_id" TEXT NOT NULL,
    "max_budget" DOUBLE PRECISION,
    "soft_budget" DOUBLE PRECISION,
    "max_parallel_requests" INTEGER,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "model_max_budget" JSONB,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_BudgetTable_pkey" PRIMARY KEY ("budget_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_CredentialsTable" (
    "credential_id" TEXT NOT NULL,
    "credential_name" TEXT NOT NULL,
    "credential_values" JSONB NOT NULL,
    "credential_info" JSONB,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_CredentialsTable_pkey" PRIMARY KEY ("credential_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_ProxyModelTable" (
    "model_id" TEXT NOT NULL,
    "model_name" TEXT NOT NULL,
    "litellm_params" JSONB NOT NULL,
    "model_info" JSONB,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_ProxyModelTable_pkey" PRIMARY KEY ("model_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_OrganizationTable" (
    "organization_id" TEXT NOT NULL,
    "organization_alias" TEXT NOT NULL,
    "budget_id" TEXT NOT NULL,
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "models" TEXT[],
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_OrganizationTable_pkey" PRIMARY KEY ("organization_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_ModelTable" (
    "id" SERIAL NOT NULL,
    "aliases" JSONB,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_ModelTable_pkey" PRIMARY KEY ("id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_TeamTable" (
    "team_id" TEXT NOT NULL,
    "team_alias" TEXT,
    "organization_id" TEXT,
    "admins" TEXT[],
    "members" TEXT[],
    "members_with_roles" JSONB NOT NULL DEFAULT '{}',
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "max_budget" DOUBLE PRECISION,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "models" TEXT[],
    "max_parallel_requests" INTEGER,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "blocked" BOOLEAN NOT NULL DEFAULT false,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "model_max_budget" JSONB NOT NULL DEFAULT '{}',
    "model_id" INTEGER,
    CONSTRAINT "LiteLLM_TeamTable_pkey" PRIMARY KEY ("team_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_UserTable" (
    "user_id" TEXT NOT NULL,
    "user_alias" TEXT,
    "team_id" TEXT,
    "sso_user_id" TEXT,
    "organization_id" TEXT,
    "password" TEXT,
    "teams" TEXT[] DEFAULT ARRAY[]::TEXT[],
    "user_role" TEXT,
    "max_budget" DOUBLE PRECISION,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "user_email" TEXT,
    "models" TEXT[],
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "max_parallel_requests" INTEGER,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "model_max_budget" JSONB NOT NULL DEFAULT '{}',
    "created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    CONSTRAINT "LiteLLM_UserTable_pkey" PRIMARY KEY ("user_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_VerificationToken" (
    "token" TEXT NOT NULL,
    "key_name" TEXT,
    "key_alias" TEXT,
    "soft_budget_cooldown" BOOLEAN NOT NULL DEFAULT false,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "expires" TIMESTAMP(3),
    "models" TEXT[],
    "aliases" JSONB NOT NULL DEFAULT '{}',
    "config" JSONB NOT NULL DEFAULT '{}',
    "user_id" TEXT,
    "team_id" TEXT,
    "permissions" JSONB NOT NULL DEFAULT '{}',
    "max_parallel_requests" INTEGER,
    "metadata" JSONB NOT NULL DEFAULT '{}',
    "blocked" BOOLEAN,
    "tpm_limit" BIGINT,
    "rpm_limit" BIGINT,
    "max_budget" DOUBLE PRECISION,
    "budget_duration" TEXT,
    "budget_reset_at" TIMESTAMP(3),
    "allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
    "model_spend" JSONB NOT NULL DEFAULT '{}',
    "model_max_budget" JSONB NOT NULL DEFAULT '{}',
    "budget_id" TEXT,
    "organization_id" TEXT,
    "created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "created_by" TEXT,
    "updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "updated_by" TEXT,
    CONSTRAINT "LiteLLM_VerificationToken_pkey" PRIMARY KEY ("token")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_EndUserTable" (
    "user_id" TEXT NOT NULL,
    "alias" TEXT,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "allowed_model_region" TEXT,
    "default_model" TEXT,
    "budget_id" TEXT,
    "blocked" BOOLEAN NOT NULL DEFAULT false,
    CONSTRAINT "LiteLLM_EndUserTable_pkey" PRIMARY KEY ("user_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_Config" (
    "param_name" TEXT NOT NULL,
    "param_value" JSONB,
    CONSTRAINT "LiteLLM_Config_pkey" PRIMARY KEY ("param_name")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_SpendLogs" (
    "request_id" TEXT NOT NULL,
    "call_type" TEXT NOT NULL,
    "api_key" TEXT NOT NULL DEFAULT '',
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "total_tokens" INTEGER NOT NULL DEFAULT 0,
    "prompt_tokens" INTEGER NOT NULL DEFAULT 0,
    "completion_tokens" INTEGER NOT NULL DEFAULT 0,
    "startTime" TIMESTAMP(3) NOT NULL,
    "endTime" TIMESTAMP(3) NOT NULL,
    "completionStartTime" TIMESTAMP(3),
    "model" TEXT NOT NULL DEFAULT '',
    "model_id" TEXT DEFAULT '',
    "model_group" TEXT DEFAULT '',
    "custom_llm_provider" TEXT DEFAULT '',
    "api_base" TEXT DEFAULT '',
    "user" TEXT DEFAULT '',
    "metadata" JSONB DEFAULT '{}',
    "cache_hit" TEXT DEFAULT '',
    "cache_key" TEXT DEFAULT '',
    "request_tags" JSONB DEFAULT '[]',
    "team_id" TEXT,
    "end_user" TEXT,
    "requester_ip_address" TEXT,
    "messages" JSONB DEFAULT '{}',
    "response" JSONB DEFAULT '{}',
    CONSTRAINT "LiteLLM_SpendLogs_pkey" PRIMARY KEY ("request_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_ErrorLogs" (
    "request_id" TEXT NOT NULL,
    "startTime" TIMESTAMP(3) NOT NULL,
    "endTime" TIMESTAMP(3) NOT NULL,
    "api_base" TEXT NOT NULL DEFAULT '',
    "model_group" TEXT NOT NULL DEFAULT '',
    "litellm_model_name" TEXT NOT NULL DEFAULT '',
    "model_id" TEXT NOT NULL DEFAULT '',
    "request_kwargs" JSONB NOT NULL DEFAULT '{}',
    "exception_type" TEXT NOT NULL DEFAULT '',
    "exception_string" TEXT NOT NULL DEFAULT '',
    "status_code" TEXT NOT NULL DEFAULT '',
    CONSTRAINT "LiteLLM_ErrorLogs_pkey" PRIMARY KEY ("request_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_UserNotifications" (
    "request_id" TEXT NOT NULL,
    "user_id" TEXT NOT NULL,
    "models" TEXT[],
    "justification" TEXT NOT NULL,
    "status" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_UserNotifications_pkey" PRIMARY KEY ("request_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_TeamMembership" (
    "user_id" TEXT NOT NULL,
    "team_id" TEXT NOT NULL,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "budget_id" TEXT,
    CONSTRAINT "LiteLLM_TeamMembership_pkey" PRIMARY KEY ("user_id","team_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_OrganizationMembership" (
    "user_id" TEXT NOT NULL,
    "organization_id" TEXT NOT NULL,
    "user_role" TEXT,
    "spend" DOUBLE PRECISION DEFAULT 0.0,
    "budget_id" TEXT,
    "created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
    CONSTRAINT "LiteLLM_OrganizationMembership_pkey" PRIMARY KEY ("user_id","organization_id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_InvitationLink" (
    "id" TEXT NOT NULL,
    "user_id" TEXT NOT NULL,
    "is_accepted" BOOLEAN NOT NULL DEFAULT false,
    "accepted_at" TIMESTAMP(3),
    "expires_at" TIMESTAMP(3) NOT NULL,
    "created_at" TIMESTAMP(3) NOT NULL,
    "created_by" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL,
    "updated_by" TEXT NOT NULL,
    CONSTRAINT "LiteLLM_InvitationLink_pkey" PRIMARY KEY ("id")
 );
 -- CreateTable
 CREATE TABLE "LiteLLM_AuditLog" (
    "id" TEXT NOT NULL,
    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "changed_by" TEXT NOT NULL DEFAULT '',
    "changed_by_api_key" TEXT NOT NULL DEFAULT '',
    "action" TEXT NOT NULL,
    "table_name" TEXT NOT NULL,
    "object_id" TEXT NOT NULL,
    "before_value" JSONB,
    "updated_values" JSONB,
    CONSTRAINT "LiteLLM_AuditLog_pkey" PRIMARY KEY ("id")
 );
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_CredentialsTable_credential_name_key" ON "LiteLLM_CredentialsTable"("credential_name");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_TeamTable_model_id_key" ON "LiteLLM_TeamTable"("model_id");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_UserTable_sso_user_id_key" ON "LiteLLM_UserTable"("sso_user_id");
 -- CreateIndex
 CREATE INDEX "LiteLLM_SpendLogs_startTime_idx" ON "LiteLLM_SpendLogs"("startTime");
 -- CreateIndex
 CREATE INDEX "LiteLLM_SpendLogs_end_user_idx" ON "LiteLLM_SpendLogs"("end_user");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_OrganizationMembership_user_id_organization_id_key" ON "LiteLLM_OrganizationMembership"("user_id", "organization_id");
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationTable" ADD CONSTRAINT "LiteLLM_OrganizationTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_model_id_fkey" FOREIGN KEY ("model_id") REFERENCES "LiteLLM_ModelTable"("id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_UserTable" ADD CONSTRAINT "LiteLLM_UserTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_EndUserTable" ADD CONSTRAINT "LiteLLM_EndUserTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_TeamMembership" ADD CONSTRAINT "LiteLLM_TeamMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
 -- AddForeignKey
 ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_updated_by_fkey" FOREIGN KEY ("updated_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
--- a/deploy/migrations/20250326171002_add_daily_user_table/migration.sql
+++ b/deploy/migrations/20250326171002_add_daily_user_table/migration.sql
@ -0,0 +1,33 @@
 -- CreateTable
 CREATE TABLE "LiteLLM_DailyUserSpend" (
    "id" TEXT NOT NULL,
    "user_id" TEXT NOT NULL,
    "date" TEXT NOT NULL,
    "api_key" TEXT NOT NULL,
    "model" TEXT NOT NULL,
    "model_group" TEXT,
    "custom_llm_provider" TEXT,
    "prompt_tokens" INTEGER NOT NULL DEFAULT 0,
    "completion_tokens" INTEGER NOT NULL DEFAULT 0,
    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) NOT NULL,
    CONSTRAINT "LiteLLM_DailyUserSpend_pkey" PRIMARY KEY ("id")
 );
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_date_idx" ON "LiteLLM_DailyUserSpend"("date");
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_user_id_idx" ON "LiteLLM_DailyUserSpend"("user_id");
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_api_key_idx" ON "LiteLLM_DailyUserSpend"("api_key");
 -- CreateIndex
 CREATE INDEX "LiteLLM_DailyUserSpend_model_idx" ON "LiteLLM_DailyUserSpend"("model");
 -- CreateIndex
 CREATE UNIQUE INDEX "LiteLLM_DailyUserSpend_user_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyUserSpend"("user_id", "date", "api_key", "model", "custom_llm_provider");
--- a/deploy/migrations/20250327180120_add_api_requests_to_daily_user_table/migration.sql
+++ b/deploy/migrations/20250327180120_add_api_requests_to_daily_user_table/migration.sql
@ -0,0 +1,3 @@
 -- AlterTable
 ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN     "api_requests" INTEGER NOT NULL DEFAULT 0;
--- a/deploy/migrations/migration_lock.toml
+++ b/deploy/migrations/migration_lock.toml
@ -0,0 +1 @@
 provider = "postgresql"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -66,5 +66,3 @@ volumes:
  postgres_data:
    name: litellm_postgres_data  # Named volume for Postgres data persistence
 # ...rest of your docker-compose config if any
--- a/docker/Dockerfile.database
+++ b/docker/Dockerfile.database
@ -59,9 +59,6 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
 # install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 
 RUN pip install redisvl==0.0.7 --no-deps
 # ensure pyjwt is used, not jwt
 RUN pip uninstall jwt -y
 RUN pip uninstall PyJWT -y
--- a/docker/Dockerfile.non_root
+++ b/docker/Dockerfile.non_root
@ -14,7 +14,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 # Install build dependencies
 RUN apt-get clean && apt-get update && \
-    apt-get install -y gcc python3-dev && \
+    apt-get install -y gcc g++ python3-dev && \
    rm -rf /var/lib/apt/lists/*
 RUN pip install --no-cache-dir --upgrade pip && \
@ -56,10 +56,8 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
 # install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
 # ensure pyjwt is used, not jwt
-RUN pip install redisvl==0.0.7 --no-deps --no-cache-dir && \
+RUN pip uninstall jwt -y && \
    pip uninstall jwt -y && \
    pip uninstall PyJWT -y && \
    pip install PyJWT==2.9.0 --no-cache-dir
--- a/docs/my-website/docs/caching/all_caches.md
+++ b/docs/my-website/docs/caching/all_caches.md
@ -26,7 +26,7 @@ Install redis
 pip install redis
 ```
-For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/
+For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/
 ```python
 import litellm
@ -91,12 +91,12 @@ response2 = completion(
 <TabItem value="redis-sem" label="redis-semantic cache">
-Install redis
+Install redisvl client
 ```shell
-pip install redisvl==0.0.7
+pip install redisvl==0.4.1
 ```
-For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/
+For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/
 ```python
 import litellm
@ -114,6 +114,7 @@ litellm.cache = Cache(
    port=os.environ["REDIS_PORT"],
    password=os.environ["REDIS_PASSWORD"],
    similarity_threshold=0.8, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity
    ttl=120,
    redis_semantic_cache_embedding_model="text-embedding-ada-002", # this model is passed to litellm.embedding(), any litellm.embedding() model is supported here
 )
 response1 = completion(
@ -471,11 +472,13 @@ def __init__(
    password: Optional[str] = None,
    namespace: Optional[str] = None,
    default_in_redis_ttl: Optional[float] = None,
    similarity_threshold: Optional[float] = None,
    redis_semantic_cache_use_async=False,
    redis_semantic_cache_embedding_model="text-embedding-ada-002",
    redis_flush_size=None,
    # redis semantic cache params
    similarity_threshold: Optional[float] = None,
    redis_semantic_cache_embedding_model: str = "text-embedding-ada-002",
    redis_semantic_cache_index_name: Optional[str] = None,
    # s3 Bucket, boto3 configuration
    s3_bucket_name: Optional[str] = None,
    s3_region_name: Optional[str] = None,
--- a/docs/my-website/docs/mcp.md
+++ b/docs/my-website/docs/mcp.md
@ -272,14 +272,7 @@ async with stdio_client(server_params) as (read, write):
 </TabItem>
 </Tabs>
-## Upcoming Features
+## Advanced Usage
 :::info
 **This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
 :::
 ### Expose MCP tools on LiteLLM Proxy Server
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -1776,6 +1776,7 @@ response = completion(
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 1. Setup config.yaml 
@ -1820,11 +1821,13 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 ```
 </TabItem>
 </Tabs>
 ### SSO Login (AWS Profile)
 - Set `AWS_PROFILE` environment variable
 - Make bedrock completion call
 ```python
 import os
 from litellm import completion
@ -1917,12 +1920,46 @@ model_list:
 </Tabs>
 Text to Image : 
 ```bash
 curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
 -d '{
    "model": "amazon.nova-canvas-v1:0",
    "prompt": "A cute baby sea otter"
 }'
 ```
 Color Guided Generation:
 ```bash
 curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
 -d '{
    "model": "amazon.nova-canvas-v1:0",
    "prompt": "A cute baby sea otter",
    "taskType": "COLOR_GUIDED_GENERATION",
    "colorGuidedGenerationParams":{"colors":["#FFFFFF"]}
 }'
 ```
 | Model Name              | Function Call                               |
 |-------------------------|---------------------------------------------|
 | Stable Diffusion 3 - v0 | `image_generation(model="bedrock/stability.stability.sd3-large-v1:0", prompt=prompt)` |
 | Stable Diffusion - v0   | `image_generation(model="bedrock/stability.stable-diffusion-xl-v0", prompt=prompt)` |
 | Stable Diffusion - v1   | `image_generation(model="bedrock/stability.stable-diffusion-xl-v1", prompt=prompt)` |
 | Amazon Nova Canvas - v0 | `image_generation(model="bedrock/amazon.nova-canvas-v1:0", prompt=prompt)` |
 ### Passing an external BedrockRuntime.Client as a parameter - Completion()
 This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
 :::warning
-This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
+
 Experimental - 2024-Jun-23:
--- a/docs/my-website/docs/providers/openai.md
+++ b/docs/my-website/docs/providers/openai.md
@ -325,6 +325,74 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 | fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
 ## OpenAI Audio Transcription
 LiteLLM supports OpenAI Audio Transcription endpoint.
 Supported models:
 | Model Name                | Function Call                                                          |
 |---------------------------|-----------------------------------------------------------------|
 | `whisper-1`    | `response = completion(model="whisper-1", file=audio_file)`     |
 | `gpt-4o-transcribe` | `response = completion(model="gpt-4o-transcribe", file=audio_file)` |
 | `gpt-4o-mini-transcribe` | `response = completion(model="gpt-4o-mini-transcribe", file=audio_file)` |
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import transcription
 import os 
 # set api keys 
 os.environ["OPENAI_API_KEY"] = ""
 audio_file = open("/path/to/audio.mp3", "rb")
 response = transcription(model="gpt-4o-transcribe", file=audio_file)
 print(f"response: {response}")
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 1. Setup config.yaml
 ```yaml
 model_list:
 - model_name: gpt-4o-transcribe
  litellm_params:
    model: gpt-4o-transcribe
    api_key: os.environ/OPENAI_API_KEY
  model_info:
    mode: audio_transcription
 general_settings:
  master_key: sk-1234
 ```
 2. Start the proxy
 ```bash
 litellm --config config.yaml
 ```
 3. Test it!
 ```bash
 curl --location 'http://0.0.0.0:8000/v1/audio/transcriptions' \
 --header 'Authorization: Bearer sk-1234' \
 --form 'file=@"/Users/krrishdholakia/Downloads/gettysburg.wav"' \
 --form 'model="gpt-4o-transcribe"'
 ```
 </TabItem>
 </Tabs>
 ## Advanced
 ### Getting OpenAI API Response Headers 
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -1369,6 +1369,103 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 </Tabs>
 ## Gemini Pro
 | Model Name       | Function Call                        |
 |------------------|--------------------------------------|
 | gemini-pro   | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
 ## Fine-tuned Models
 You can call fine-tuned Vertex AI Gemini models through LiteLLM
 | Property | Details |
 |----------|---------|
 | Provider Route | `vertex_ai/gemini/{MODEL_ID}` |
 | Vertex Documentation | [Vertex AI - Fine-tuned Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning#test_the_tuned_model_with_a_prompt)|
 | Supported Operations | `/chat/completions`, `/completions`, `/embeddings`, `/images` |
 To use a model that follows the `/gemini` request/response format, simply set the model parameter as 
 ```python title="Model parameter for calling fine-tuned gemini models"
 model="vertex_ai/gemini/<your-finetuned-model>"
 ```
 <Tabs>
 <TabItem value="sdk" label="LiteLLM Python SDK">
 ```python showLineNumbers title="Example"
 import litellm
 import os
 ## set ENV variables
 os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
 os.environ["VERTEXAI_LOCATION"] = "us-central1"
 response = litellm.completion(
  model="vertex_ai/gemini/<your-finetuned-model>",  # e.g. vertex_ai/gemini/4965075652664360960
  messages=[{ "content": "Hello, how are you?","role": "user"}],
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="LiteLLM Proxy">
 1. Add Vertex Credentials to your env 
 ```bash title="Authenticate to Vertex AI"
 !gcloud auth application-default login
 ```
 2. Setup config.yaml 
 ```yaml showLineNumbers title="Add to litellm config"
 - model_name: finetuned-gemini
  litellm_params:
    model: vertex_ai/gemini/<ENDPOINT_ID>
    vertex_project: <PROJECT_ID>
    vertex_location: <LOCATION>
 ```
 3. Test it! 
 <Tabs>
 <TabItem value="openai" label="OpenAI Python SDK">
 ```python showLineNumbers title="Example request"
 from openai import OpenAI
 client = OpenAI(
    api_key="your-litellm-key",
    base_url="http://0.0.0.0:4000"
 )
 response = client.chat.completions.create(
    model="finetuned-gemini",
    messages=[
        {"role": "user", "content": "hi"}
    ]
 )
 print(response)
 ```
 </TabItem>
 <TabItem value="curl" label="curl">
 ```bash showLineNumbers title="Example request"
 curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: <LITELLM_KEY>' \
 --data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
 ```
 </TabItem>
 </Tabs>
 </TabItem>
 </Tabs>
 ## Model Garden
 :::tip
@ -1479,67 +1576,6 @@ response = completion(
 </Tabs>
 ## Gemini Pro
 | Model Name       | Function Call                        |
 |------------------|--------------------------------------|
 | gemini-pro   | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
 ## Fine-tuned Models
 Fine tuned models on vertex have a numerical model/endpoint id. 
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import completion
 import os
 ## set ENV variables
 os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
 os.environ["VERTEXAI_LOCATION"] = "us-central1"
 response = completion(
  model="vertex_ai/<your-finetuned-model>",  # e.g. vertex_ai/4965075652664360960
  messages=[{ "content": "Hello, how are you?","role": "user"}],
  base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 1. Add Vertex Credentials to your env 
 ```bash
 !gcloud auth application-default login
 ```
 2. Setup config.yaml 
 ```yaml
 - model_name: finetuned-gemini
  litellm_params:
    model: vertex_ai/<ENDPOINT_ID>
    vertex_project: <PROJECT_ID>
    vertex_location: <LOCATION>
  model_info:
    base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
 ```
 3. Test it! 
 ```bash
 curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: <LITELLM_KEY>' \
 --data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
 ```
 </TabItem>
 </Tabs>
 ## Gemini Pro Vision
 | Model Name       | Function Call                        |
--- a/docs/my-website/docs/proxy/admin_ui_sso.md
+++ b/docs/my-website/docs/proxy/admin_ui_sso.md
@ -147,6 +147,11 @@ Some SSO providers require a specific redirect url for login and logout. You can
 - Login: `<your-proxy-base-url>/sso/key/generate`
 - Logout: `<your-proxy-base-url>`
 Here's the env var to set the logout url on the proxy
 ```bash
 PROXY_LOGOUT_URL="https://www.google.com"
 ```
 #### Step 3. Set `PROXY_BASE_URL` in your .env
 Set this in your .env (so the proxy can set the correct redirect url)
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@ -160,7 +160,7 @@ general_settings:
 | database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
 | database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
 | database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
-| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
+| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key [Doc on graceful db unavailability](prod#5-if-running-litellm-on-vpc-gracefully-handle-db-unavailability) |
 | custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
 | max_parallel_requests | integer | The max parallel requests allowed per deployment |
 | global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
@ -479,7 +479,7 @@ router_settings:
 | PROXY_ADMIN_ID | Admin identifier for proxy server
 | PROXY_BASE_URL | Base URL for proxy service
 | PROXY_LOGOUT_URL | URL for logging out of the proxy service
-| PROXY_MASTER_KEY | Master key for proxy authentication
+| LITELLM_MASTER_KEY | Master key for proxy authentication
 | QDRANT_API_BASE | Base URL for Qdrant API
 | QDRANT_API_KEY | API key for Qdrant service
 | QDRANT_URL | Connection URL for Qdrant database
--- a/docs/my-website/docs/proxy/prod.md
+++ b/docs/my-website/docs/proxy/prod.md
@ -94,15 +94,31 @@ This disables the load_dotenv() functionality, which will automatically load you
 ## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
-This will allow LiteLLM to continue to process requests even if the DB is unavailable. This is better handling for DB unavailability.
+When running LiteLLM on a VPC (and inaccessible from the public internet), you can enable graceful degradation so that request processing continues even if the database is temporarily unavailable.
 **WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
-```yaml
+#### Configuration
 ```yaml showLineNumbers title="litellm config.yaml"
 general_settings:
  allow_requests_on_db_unavailable: True
 ```
 #### Expected Behavior
 When `allow_requests_on_db_unavailable` is set to `true`, LiteLLM will handle errors as follows:
 | Type of Error | Expected Behavior | Details |
 |---------------|-------------------|----------------|
 | Prisma Errors | ✅ Request will be allowed | Covers issues like DB connection resets or rejections from the DB via Prisma, the ORM used by LiteLLM. |
 | Httpx Errors | ✅ Request will be allowed | Occurs when the database is unreachable, allowing the request to proceed despite the DB outage. |
 | Pod Startup Behavior | ✅ Pods start regardless | LiteLLM Pods will start even if the database is down or unreachable, ensuring higher uptime guarantees for deployments. |
 | Health/Readiness Check | ✅ Always returns 200 OK | The /health/readiness endpoint returns a 200 OK status to ensure that pods remain operational even when the database is unavailable.
 | LiteLLM Budget Errors or Model Errors | ❌ Request will be blocked | Triggered when the DB is reachable but the authentication token is invalid, lacks access, or exceeds budget limits. |
 ## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
 By default, LiteLLM writes several types of logs to the database:
@ -183,93 +199,3 @@ You should only see the following level of details in logs on the proxy server
 # INFO:     192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
 # INFO:     192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
 ```
 ### Machine Specifications to Deploy LiteLLM
 | Service | Spec | CPUs | Memory | Architecture | Version|
 | --- | --- | --- | --- | --- | --- | 
 | Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
 | Redis Cache | - | - | - | - | 7.0+ Redis Engine|
 ### Reference Kubernetes Deployment YAML
 Reference Kubernetes `deployment.yaml` that was load tested by us
 ```yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm-deployment
 spec:
  replicas: 3
  selector:
    matchLabels:
      app: litellm
  template:
    metadata:
      labels:
        app: litellm
    spec:
      containers:
        - name: litellm-container
          image: ghcr.io/berriai/litellm:main-latest
          imagePullPolicy: Always
          env:
            - name: AZURE_API_KEY
              value: "d6******"
            - name: AZURE_API_BASE
              value: "https://ope******"
            - name: LITELLM_MASTER_KEY
              value: "sk-1234"
            - name: DATABASE_URL
              value: "po**********"
          args:
            - "--config"
            - "/app/proxy_config.yaml"  # Update the path to mount the config file
          volumeMounts:                 # Define volume mount for proxy_config.yaml
            - name: config-volume
              mountPath: /app
              readOnly: true
          livenessProbe:
            httpGet:
              path: /health/liveliness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
          readinessProbe:
            httpGet:
              path: /health/readiness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
      volumes:  # Define volume to mount proxy_config.yaml
        - name: config-volume
          configMap:
            name: litellm-config  
 ```
 Reference Kubernetes `service.yaml` that was load tested by us
 ```yaml
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm-service
 spec:
  selector:
    app: litellm
  ports:
    - protocol: TCP
      port: 4000
      targetPort: 4000
  type: LoadBalancer
 ```
--- a/docs/my-website/docs/set_keys.md
+++ b/docs/my-website/docs/set_keys.md
@ -188,7 +188,13 @@ Currently implemented for:
 - OpenAI (if OPENAI_API_KEY is set)
 - Fireworks AI (if FIREWORKS_AI_API_KEY is set)
 - LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
 - Gemini (if GEMINI_API_KEY is set)
 - XAI (if XAI_API_KEY is set)   
 - Anthropic (if ANTHROPIC_API_KEY is set)
 You can also specify a custom provider to check:
 **All providers**:
 ```python
 from litellm import get_valid_models
@ -196,6 +202,14 @@ valid_models = get_valid_models(check_provider_endpoint=True)
 print(valid_models)
 ```
 **Specific provider**:
 ```python
 from litellm import get_valid_models
 valid_models = get_valid_models(check_provider_endpoint=True, custom_llm_provider="openai")
 print(valid_models)
 ```
 ### `validate_environment(model: str)`
 This helper tells you if you have all the required environment variables for a model, and if not - what's missing. 
--- a/docs/my-website/img/release_notes/team_model_add.png
+++ b/docs/my-website/img/release_notes/team_model_add.png
--- a/docs/my-website/release_notes/v1.63.14/index.md
+++ b/docs/my-website/release_notes/v1.63.14/index.md
@ -24,6 +24,7 @@ This release brings:
 - LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
 - Perf improvements for Usage-based Routing
 - Streaming guardrail support via websockets
 - Azure OpenAI client perf fix (from previous release)
 ## Docker Run LiteLLM Proxy
@ -31,7 +32,7 @@ This release brings:
 docker run
 -e STORE_MODEL_IN_DB=True
 -p 4000:4000
-ghcr.io/berriai/litellm:main-v1.63.14-stable
+ghcr.io/berriai/litellm:main-v1.63.14-stable.patch1
 ```
 ## Demo Instance
--- a/docs/my-website/release_notes/v1.65.0/index.md
+++ b/docs/my-website/release_notes/v1.65.0/index.md
@ -0,0 +1,34 @@
 ---
 title: v1.65.0 - Team Model Add - update
 slug: v1.65.0
 date: 2025-03-28T10:00:00
 authors:
  - name: Krrish Dholakia
    title: CEO, LiteLLM
    url: https://www.linkedin.com/in/krish-d/
    image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
  - name: Ishaan Jaffer
    title: CTO, LiteLLM
    url: https://www.linkedin.com/in/reffajnaahsi/
    image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
 tags: [management endpoints, team models, ui]
 hide_table_of_contents: false
 ---
 import Image from '@theme/IdealImage';
 v1.65.0 updates the `/model/new` endpoint to prevent non-team admins from creating team models.
 This means that only proxy admins or team admins can create team models.
 ## Additional Changes
 - Allows team admins to call `/model/update` to update team models.
 - Allows team admins to call `/model/delete` to delete team models.
 - Introduces new `user_models_only` param to `/v2/model/info` - only return models added by this user.
 These changes enable team admins to add and manage models for their team on the LiteLLM UI + API.
 <Image img={require('../../img/release_notes/team_model_add.png')} />
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -304,6 +304,7 @@ const sidebars = {
            "image_variations",
          ]
        },
        "mcp",
        {
          type: "category",
          label: "/audio",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -2,7 +2,7 @@
 import warnings
 warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
-### INIT VARIABLES ##########
+### INIT VARIABLES ###########
 import threading
 import os
 from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
@ -122,6 +122,9 @@ langsmith_batch_size: Optional[int] = None
 prometheus_initialize_budget_metrics: Optional[bool] = False
 argilla_batch_size: Optional[int] = None
 datadog_use_v1: Optional[bool] = False  # if you want to use v1 datadog logged payload
 gcs_pub_sub_use_v1: Optional[bool] = (
    False  # if you want to use v1 gcs pubsub logged payload
 )
 argilla_transformation_object: Optional[Dict[str, Any]] = None
 _async_input_callback: List[Union[str, Callable, CustomLogger]] = (
    []
@ -810,6 +813,7 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig
 from .llms.maritalk import MaritalkConfig
 from .llms.openrouter.chat.transformation import OpenrouterConfig
 from .llms.anthropic.chat.transformation import AnthropicConfig
 from .llms.anthropic.common_utils import AnthropicModelInfo
 from .llms.groq.stt.transformation import GroqSTTConfig
 from .llms.anthropic.completion.transformation import AnthropicTextConfig
 from .llms.triton.completion.transformation import TritonConfig
@ -845,6 +849,7 @@ from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
    VertexGeminiConfig,
    VertexGeminiConfig as VertexAIConfig,
 )
 from .llms.gemini.common_utils import GeminiModelInfo
 from .llms.gemini.chat.transformation import (
    GoogleAIStudioGeminiConfig,
    GoogleAIStudioGeminiConfig as GeminiConfig,  # aliased to maintain backwards compatibility
@ -947,6 +952,12 @@ openaiOSeriesConfig = OpenAIOSeriesConfig()
 from .llms.openai.chat.gpt_transformation import (
    OpenAIGPTConfig,
 )
 from .llms.openai.transcriptions.whisper_transformation import (
    OpenAIWhisperAudioTranscriptionConfig,
 )
 from .llms.openai.transcriptions.gpt_transformation import (
    OpenAIGPTAudioTranscriptionConfig,
 )
 openAIGPTConfig = OpenAIGPTConfig()
 from .llms.openai.chat.gpt_audio_transformation import (
@ -975,6 +986,7 @@ from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
 from .llms.friendliai.chat.transformation import FriendliaiChatConfig
 from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
 from .llms.xai.chat.transformation import XAIChatConfig
 from .llms.xai.common_utils import XAIModelInfo
 from .llms.volcengine import VolcEngineConfig
 from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
 from .llms.azure.azure import (
--- a/litellm/caching/caching.py
+++ b/litellm/caching/caching.py
@ -88,16 +88,16 @@ class Cache:
        s3_aws_session_token: Optional[str] = None,
        s3_config: Optional[Any] = None,
        s3_path: Optional[str] = None,
-        redis_semantic_cache_use_async=False,
+        redis_semantic_cache_embedding_model: str = "text-embedding-ada-002",
-        redis_semantic_cache_embedding_model="text-embedding-ada-002",
+        redis_semantic_cache_index_name: Optional[str] = None,
        redis_flush_size: Optional[int] = None,
        redis_startup_nodes: Optional[List] = None,
-        disk_cache_dir=None,
+        disk_cache_dir: Optional[str] = None,
        qdrant_api_base: Optional[str] = None,
        qdrant_api_key: Optional[str] = None,
        qdrant_collection_name: Optional[str] = None,
        qdrant_quantization_config: Optional[str] = None,
-        qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
+        qdrant_semantic_cache_embedding_model: str = "text-embedding-ada-002",
        **kwargs,
    ):
        """
@ -170,8 +170,8 @@ class Cache:
                port=port,
                password=password,
                similarity_threshold=similarity_threshold,
                use_async=redis_semantic_cache_use_async,
                embedding_model=redis_semantic_cache_embedding_model,
                index_name=redis_semantic_cache_index_name,
                **kwargs,
            )
        elif type == LiteLLMCacheType.QDRANT_SEMANTIC:
--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@ -1,271 +1,284 @@
 """
-Redis Semantic Cache implementation
+Redis Semantic Cache implementation for LiteLLM
-Has 4 methods:
+The RedisSemanticCache provides semantic caching functionality using Redis as a backend.
-    - set_cache
+This cache stores responses based on the semantic similarity of prompts rather than
-    - get_cache
+exact matching, allowing for more flexible caching of LLM responses.
-    - async_set_cache
+
-    - async_get_cache
+This implementation uses RedisVL's SemanticCache to find semantically similar prompts
 and their cached responses.
 """
 import ast
 import asyncio
 import json
-from typing import Any
+import os
 from typing import Any, Dict, List, Optional, Tuple
 import litellm
 from litellm._logging import print_verbose
-
+from litellm.litellm_core_utils.prompt_templates.common_utils import get_str_from_messages
 from .base_cache import BaseCache
 class RedisSemanticCache(BaseCache):
    """
    Redis-backed semantic cache for LLM responses. 
    This cache uses vector similarity to find semantically similar prompts that have been 
    previously sent to the LLM, allowing for cache hits even when prompts are not identical
    but carry similar meaning.
    """
    DEFAULT_REDIS_INDEX_NAME: str = "litellm_semantic_cache_index"
    def __init__(
        self,
-        host=None,
+        host: Optional[str] = None,
-        port=None,
+        port: Optional[str] = None,
-        password=None,
+        password: Optional[str] = None,
-        redis_url=None,
+        redis_url: Optional[str] = None,
-        similarity_threshold=None,
+        similarity_threshold: Optional[float] = None,
-        use_async=False,
+        embedding_model: str = "text-embedding-ada-002",
-        embedding_model="text-embedding-ada-002",
+        index_name: Optional[str] = None,
        **kwargs,
    ):
        from redisvl.index import SearchIndex
        print_verbose(
            "redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
        )
        if similarity_threshold is None:
            raise Exception("similarity_threshold must be provided, passed None")
        self.similarity_threshold = similarity_threshold
        self.embedding_model = embedding_model
        schema = {
            "index": {
                "name": "litellm_semantic_cache_index",
                "prefix": "litellm",
                "storage_type": "hash",
            },
            "fields": {
                "text": [{"name": "response"}],
                "vector": [
                    {
                        "name": "litellm_embedding",
                        "dims": 1536,
                        "distance_metric": "cosine",
                        "algorithm": "flat",
                        "datatype": "float32",
                    }
                ],
            },
        }
        if redis_url is None:
            # if no url passed, check if host, port and password are passed, if not raise an Exception
            if host is None or port is None or password is None:
                # try checking env for host, port and password
                import os
                host = os.getenv("REDIS_HOST")
                port = os.getenv("REDIS_PORT")
                password = os.getenv("REDIS_PASSWORD")
                if host is None or port is None or password is None:
                    raise Exception("Redis host, port, and password must be provided")
            redis_url = "redis://:" + password + "@" + host + ":" + port
        print_verbose(f"redis semantic-cache redis_url: {redis_url}")
        if use_async is False:
            self.index = SearchIndex.from_dict(schema)
            self.index.connect(redis_url=redis_url)
            try:
                self.index.create(overwrite=False)  # don't overwrite existing index
            except Exception as e:
                print_verbose(f"Got exception creating semantic cache index: {str(e)}")
        elif use_async is True:
            schema["index"]["name"] = "litellm_semantic_cache_index_async"
            self.index = SearchIndex.from_dict(schema)
            self.index.connect(redis_url=redis_url, use_async=True)
    #
    def _get_cache_logic(self, cached_response: Any):
        """
-        Common 'get_cache_logic' across sync + async redis client implementations
+        Initialize the Redis Semantic Cache.
        Args:
            host: Redis host address
            port: Redis port
            password: Redis password
            redis_url: Full Redis URL (alternative to separate host/port/password)
            similarity_threshold: Threshold for semantic similarity (0.0 to 1.0)
                where 1.0 requires exact matches and 0.0 accepts any match
            embedding_model: Model to use for generating embeddings
            index_name: Name for the Redis index
            ttl: Default time-to-live for cache entries in seconds
            **kwargs: Additional arguments passed to the Redis client
        Raises:
            Exception: If similarity_threshold is not provided or required Redis
                connection information is missing
        """
        from redisvl.extensions.llmcache import SemanticCache
        from redisvl.utils.vectorize import CustomTextVectorizer
        if index_name is None:
            index_name = self.DEFAULT_REDIS_INDEX_NAME
        print_verbose(f"Redis semantic-cache initializing index - {index_name}")
        # Validate similarity threshold
        if similarity_threshold is None:
            raise ValueError("similarity_threshold must be provided, passed None")
        # Store configuration
        self.similarity_threshold = similarity_threshold
        # Convert similarity threshold [0,1] to distance threshold [0,2]
        # For cosine distance: 0 = most similar, 2 = least similar
        # While similarity: 1 = most similar, 0 = least similar
        self.distance_threshold = 1 - similarity_threshold
        self.embedding_model = embedding_model
        # Set up Redis connection
        if redis_url is None:
            try:
                # Attempt to use provided parameters or fallback to environment variables
                host = host or os.environ['REDIS_HOST']
                port = port or os.environ['REDIS_PORT']
                password = password or os.environ['REDIS_PASSWORD']
            except KeyError as e:
                # Raise a more informative exception if any of the required keys are missing
                missing_var = e.args[0]
                raise ValueError(f"Missing required Redis configuration: {missing_var}. "
                                 f"Provide {missing_var} or redis_url.") from e
            redis_url = f"redis://:{password}@{host}:{port}"
        print_verbose(f"Redis semantic-cache redis_url: {redis_url}")
        # Initialize the Redis vectorizer and cache
        cache_vectorizer = CustomTextVectorizer(self._get_embedding)
        self.llmcache = SemanticCache(
            name=index_name,
            redis_url=redis_url,
            vectorizer=cache_vectorizer,
            distance_threshold=self.distance_threshold,
            overwrite=False,
        )
    def _get_ttl(self, **kwargs) -> Optional[int]:
        """
        Get the TTL (time-to-live) value for cache entries.
        Args:
            **kwargs: Keyword arguments that may contain a custom TTL
        Returns:
            Optional[int]: The TTL value in seconds, or None if no TTL should be applied
        """
        ttl = kwargs.get("ttl")
        if ttl is not None:
            ttl = int(ttl)
        return ttl
    def _get_embedding(self, prompt: str) -> List[float]:
        """
        Generate an embedding vector for the given prompt using the configured embedding model.
        Args:
            prompt: The text to generate an embedding for
        Returns:
            List[float]: The embedding vector
        """
        # Create an embedding from prompt
        embedding_response = litellm.embedding(
            model=self.embedding_model,
            input=prompt,
            cache={"no-store": True, "no-cache": True},
        )
        embedding = embedding_response["data"][0]["embedding"]
        return embedding
    def _get_cache_logic(self, cached_response: Any) -> Any:
        """
        Process the cached response to prepare it for use.
        Args:
            cached_response: The raw cached response
        Returns:
            The processed cache response, or None if input was None
        """
        if cached_response is None:
            return cached_response
-        # check if cached_response is bytes
+        # Convert bytes to string if needed
        if isinstance(cached_response, bytes):
            cached_response = cached_response.decode("utf-8")
        # Convert string representation to Python object
        try:
            cached_response = json.loads(cached_response)
        except json.JSONDecodeError:
            try:
            cached_response = json.loads(
                cached_response
            )  # Convert string to dictionary
        except Exception:
                cached_response = ast.literal_eval(cached_response)
            except (ValueError, SyntaxError) as e:
                print_verbose(f"Error parsing cached response: {str(e)}")
                return None
        return cached_response
-    def set_cache(self, key, value, **kwargs):
+    def set_cache(self, key: str, value: Any, **kwargs) -> None:
-        import numpy as np
+        """
        Store a value in the semantic cache.
-        print_verbose(f"redis semantic-cache set_cache, kwargs: {kwargs}")
+        Args:
-
+            key: The cache key (not directly used in semantic caching)
-        # get the prompt
+            value: The response value to cache
-        messages = kwargs["messages"]
+            **kwargs: Additional arguments including 'messages' for the prompt
-        prompt = "".join(message["content"] for message in messages)
+                and optional 'ttl' for time-to-live
-
+        """
-        # create an embedding for prompt
+        print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}")
        embedding_response = litellm.embedding(
            model=self.embedding_model,
            input=prompt,
            cache={"no-store": True, "no-cache": True},
        )
        # get the embedding
        embedding = embedding_response["data"][0]["embedding"]
        # make the embedding a numpy array, convert to bytes
        embedding_bytes = np.array(embedding, dtype=np.float32).tobytes()
        value = str(value)
        assert isinstance(value, str)
        new_data = [
            {"response": value, "prompt": prompt, "litellm_embedding": embedding_bytes}
        ]
        # Add more data
        self.index.load(new_data)
        return
    def get_cache(self, key, **kwargs):
        print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
        from redisvl.query import VectorQuery
        # query
        # get the messages
        messages = kwargs["messages"]
        prompt = "".join(message["content"] for message in messages)
        # convert to embedding
        embedding_response = litellm.embedding(
            model=self.embedding_model,
            input=prompt,
            cache={"no-store": True, "no-cache": True},
        )
        # get the embedding
        embedding = embedding_response["data"][0]["embedding"]
        query = VectorQuery(
            vector=embedding,
            vector_field_name="litellm_embedding",
            return_fields=["response", "prompt", "vector_distance"],
            num_results=1,
        )
        results = self.index.query(query)
        if results is None:
            return None
        if isinstance(results, list):
            if len(results) == 0:
                return None
        vector_distance = results[0]["vector_distance"]
        vector_distance = float(vector_distance)
        similarity = 1 - vector_distance
        cached_prompt = results[0]["prompt"]
        # check similarity, if more than self.similarity_threshold, return results
        print_verbose(
            f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}"
        )
        if similarity > self.similarity_threshold:
            # cache hit !
            cached_value = results[0]["response"]
            print_verbose(
                f"got a cache hit, similarity: {similarity}, Current prompt: {prompt}, cached_prompt: {cached_prompt}"
            )
            return self._get_cache_logic(cached_response=cached_value)
        else:
            # cache miss !
            return None
        pass
    async def async_set_cache(self, key, value, **kwargs):
        import numpy as np
        from litellm.proxy.proxy_server import llm_model_list, llm_router
        try:
-            await self.index.acreate(overwrite=False)  # don't overwrite existing index
+            # Extract the prompt from messages
-        except Exception as e:
+            messages = kwargs.get("messages", [])
-            print_verbose(f"Got exception creating semantic cache index: {str(e)}")
+            if not messages:
-        print_verbose(f"async redis semantic-cache set_cache, kwargs: {kwargs}")
+                print_verbose("No messages provided for semantic caching")
        # get the prompt
        messages = kwargs["messages"]
        prompt = "".join(message["content"] for message in messages)
        # create an embedding for prompt
        router_model_names = (
            [m["model_name"] for m in llm_model_list]
            if llm_model_list is not None
            else []
        )
        if llm_router is not None and self.embedding_model in router_model_names:
            user_api_key = kwargs.get("metadata", {}).get("user_api_key", "")
            embedding_response = await llm_router.aembedding(
                model=self.embedding_model,
                input=prompt,
                cache={"no-store": True, "no-cache": True},
                metadata={
                    "user_api_key": user_api_key,
                    "semantic-cache-embedding": True,
                    "trace_id": kwargs.get("metadata", {}).get("trace_id", None),
                },
            )
        else:
            # convert to embedding
            embedding_response = await litellm.aembedding(
                model=self.embedding_model,
                input=prompt,
                cache={"no-store": True, "no-cache": True},
            )
        # get the embedding
        embedding = embedding_response["data"][0]["embedding"]
        # make the embedding a numpy array, convert to bytes
        embedding_bytes = np.array(embedding, dtype=np.float32).tobytes()
        value = str(value)
        assert isinstance(value, str)
        new_data = [
            {"response": value, "prompt": prompt, "litellm_embedding": embedding_bytes}
        ]
        # Add more data
        await self.index.aload(new_data)
                return
-    async def async_get_cache(self, key, **kwargs):
+            prompt = get_str_from_messages(messages)
-        print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
+            value_str = str(value)
        from redisvl.query import VectorQuery
            # Get TTL and store in Redis semantic cache
            ttl = self._get_ttl(**kwargs)
            if ttl is not None:
                self.llmcache.store(prompt, value_str, ttl=int(ttl))
            else:
                self.llmcache.store(prompt, value_str)
        except Exception as e:
            print_verbose(f"Error setting {value_str} in the Redis semantic cache: {str(e)}")
    def get_cache(self, key: str, **kwargs) -> Any:
        """
        Retrieve a semantically similar cached response.
        Args:
            key: The cache key (not directly used in semantic caching)
            **kwargs: Additional arguments including 'messages' for the prompt
        Returns:
            The cached response if a semantically similar prompt is found, else None
        """
        print_verbose(f"Redis semantic-cache get_cache, kwargs: {kwargs}")
        try:
            # Extract the prompt from messages
            messages = kwargs.get("messages", [])
            if not messages:
                print_verbose("No messages provided for semantic cache lookup")
                return None
            prompt = get_str_from_messages(messages)
            # Check the cache for semantically similar prompts
            results = self.llmcache.check(prompt=prompt)
            # Return None if no similar prompts found
            if not results:
                return None
            # Process the best matching result
            cache_hit = results[0]
            vector_distance = float(cache_hit["vector_distance"])
            # Convert vector distance back to similarity score
            # For cosine distance: 0 = most similar, 2 = least similar
            # While similarity: 1 = most similar, 0 = least similar
            similarity = 1 - vector_distance
            cached_prompt = cache_hit["prompt"]
            cached_response = cache_hit["response"]
            print_verbose(
                f"Cache hit: similarity threshold: {self.similarity_threshold}, "
                f"actual similarity: {similarity}, "
                f"current prompt: {prompt}, "
                f"cached prompt: {cached_prompt}"
            )
            return self._get_cache_logic(cached_response=cached_response)
        except Exception as e:
            print_verbose(f"Error retrieving from Redis semantic cache: {str(e)}")
    async def _get_async_embedding(self, prompt: str, **kwargs) -> List[float]:
        """
        Asynchronously generate an embedding for the given prompt.
        Args:
            prompt: The text to generate an embedding for
            **kwargs: Additional arguments that may contain metadata
        Returns:
            List[float]: The embedding vector
        """
        from litellm.proxy.proxy_server import llm_model_list, llm_router
-        # query
+        # Route the embedding request through the proxy if appropriate
        # get the messages
        messages = kwargs["messages"]
        prompt = "".join(message["content"] for message in messages)
        router_model_names = (
            [m["model_name"] for m in llm_model_list]
            if llm_model_list is not None
            else []
        )
        try:
            if llm_router is not None and self.embedding_model in router_model_names:
                # Use the router for embedding generation
                user_api_key = kwargs.get("metadata", {}).get("user_api_key", "")
                embedding_response = await llm_router.aembedding(
                    model=self.embedding_model,
@ -278,60 +291,147 @@ class RedisSemanticCache(BaseCache):
                    },
                )
            else:
-            # convert to embedding
+                # Generate embedding directly
                embedding_response = await litellm.aembedding(
                    model=self.embedding_model,
                    input=prompt,
                    cache={"no-store": True, "no-cache": True},
                )
-        # get the embedding
+            # Extract and return the embedding vector
-        embedding = embedding_response["data"][0]["embedding"]
+            return embedding_response["data"][0]["embedding"]
        except Exception as e:
            print_verbose(f"Error generating async embedding: {str(e)}")
            raise ValueError(f"Failed to generate embedding: {str(e)}") from e
-        query = VectorQuery(
+    async def async_set_cache(self, key: str, value: Any, **kwargs) -> None:
-            vector=embedding,
+        """
-            vector_field_name="litellm_embedding",
+        Asynchronously store a value in the semantic cache.
-            return_fields=["response", "prompt", "vector_distance"],
+        
        Args:
            key: The cache key (not directly used in semantic caching)
            value: The response value to cache
            **kwargs: Additional arguments including 'messages' for the prompt
                and optional 'ttl' for time-to-live
        """
        print_verbose(f"Async Redis semantic-cache set_cache, kwargs: {kwargs}")
        try:
            # Extract the prompt from messages
            messages = kwargs.get("messages", [])
            if not messages:
                print_verbose("No messages provided for semantic caching")
                return
            prompt = get_str_from_messages(messages)
            value_str = str(value)
            # Generate embedding for the value (response) to cache
            prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
            # Get TTL and store in Redis semantic cache
            ttl = self._get_ttl(**kwargs)
            if ttl is not None:
                await self.llmcache.astore(
                    prompt,
                    value_str,
                    vector=prompt_embedding,  # Pass through custom embedding
                    ttl=ttl
                )
-        results = await self.index.aquery(query)
+            else:
-        if results is None:
+                await self.llmcache.astore(
-            kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
+                    prompt,
-            return None
+                    value_str,
-        if isinstance(results, list):
+                    vector=prompt_embedding  # Pass through custom embedding
-            if len(results) == 0:
+                )
        except Exception as e:
            print_verbose(f"Error in async_set_cache: {str(e)}")
    async def async_get_cache(self, key: str, **kwargs) -> Any:
        """
        Asynchronously retrieve a semantically similar cached response.
        Args:
            key: The cache key (not directly used in semantic caching)
            **kwargs: Additional arguments including 'messages' for the prompt
        Returns:
            The cached response if a semantically similar prompt is found, else None
        """
        print_verbose(f"Async Redis semantic-cache get_cache, kwargs: {kwargs}")
        try:
            # Extract the prompt from messages
            messages = kwargs.get("messages", [])
            if not messages:
                print_verbose("No messages provided for semantic cache lookup")
                kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
                return None
-        vector_distance = results[0]["vector_distance"]
+            prompt = get_str_from_messages(messages)
-        vector_distance = float(vector_distance)
+            
            # Generate embedding for the prompt
            prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
            # Check the cache for semantically similar prompts
            results = await self.llmcache.acheck(
                prompt=prompt,
                vector=prompt_embedding
            )
            # handle results / cache hit
            if not results:
                kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 # TODO why here but not above??
                return None
            cache_hit = results[0]
            vector_distance = float(cache_hit["vector_distance"])
            # Convert vector distance back to similarity
            # For cosine distance: 0 = most similar, 2 = least similar
            # While similarity: 1 = most similar, 0 = least similar
            similarity = 1 - vector_distance
        cached_prompt = results[0]["prompt"]
-        # check similarity, if more than self.similarity_threshold, return results
+            cached_prompt = cache_hit["prompt"]
-        print_verbose(
+            cached_response = cache_hit["response"]
            f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}"
        )
            # update kwargs["metadata"] with similarity, don't rewrite the original metadata
            kwargs.setdefault("metadata", {})["semantic-similarity"] = similarity
        if similarity > self.similarity_threshold:
            # cache hit !
            cached_value = results[0]["response"]
            print_verbose(
-                f"got a cache hit, similarity: {similarity}, Current prompt: {prompt}, cached_prompt: {cached_prompt}"
+                f"Cache hit: similarity threshold: {self.similarity_threshold}, "
                f"actual similarity: {similarity}, "
                f"current prompt: {prompt}, "
                f"cached prompt: {cached_prompt}"
            )
            return self._get_cache_logic(cached_response=cached_value)
        else:
            # cache miss !
            return None
        pass
-    async def _index_info(self):
+            return self._get_cache_logic(cached_response=cached_response)
-        return await self.index.ainfo()
+        except Exception as e:
            print_verbose(f"Error in async_get_cache: {str(e)}")
            kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
-    async def async_set_cache_pipeline(self, cache_list, **kwargs):
+    async def _index_info(self) -> Dict[str, Any]:
        """
        Get information about the Redis index.
        Returns:
            Dict[str, Any]: Information about the Redis index
        """
        aindex = await self.llmcache._get_async_index()
        return await aindex.info()
    async def async_set_cache_pipeline(self, cache_list: List[Tuple[str, Any]], **kwargs) -> None:
        """
        Asynchronously store multiple values in the semantic cache.
        Args:
            cache_list: List of (key, value) tuples to cache
            **kwargs: Additional arguments
        """
        try:
            tasks = []
            for val in cache_list:
                tasks.append(self.async_set_cache(val[0], val[1], **kwargs))
            await asyncio.gather(*tasks)
        except Exception as e:
            print_verbose(f"Error in async_set_cache_pipeline: {str(e)}")
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -275,15 +275,13 @@ def cost_per_token(  # noqa: PLR0915
                custom_llm_provider=custom_llm_provider,
                prompt_characters=prompt_characters,
                completion_characters=completion_characters,
-                prompt_tokens=prompt_tokens,
+                usage=usage_block,
                completion_tokens=completion_tokens,
            )
        elif cost_router == "cost_per_token":
            return google_cost_per_token(
                model=model_without_prefix,
                custom_llm_provider=custom_llm_provider,
-                prompt_tokens=prompt_tokens,
+                usage=usage_block,
                completion_tokens=completion_tokens,
            )
    elif custom_llm_provider == "anthropic":
        return anthropic_cost_per_token(model=model, usage=usage_block)
@ -828,11 +826,14 @@ def get_response_cost_from_hidden_params(
        _hidden_params_dict = hidden_params
    additional_headers = _hidden_params_dict.get("additional_headers", {})
-    if additional_headers and "x-litellm-response-cost" in additional_headers:
+    if (
-        response_cost = additional_headers["x-litellm-response-cost"]
+        additional_headers
        and "llm_provider-x-litellm-response-cost" in additional_headers
    ):
        response_cost = additional_headers["llm_provider-x-litellm-response-cost"]
        if response_cost is None:
            return None
-        return float(additional_headers["x-litellm-response-cost"])
+        return float(additional_headers["llm_provider-x-litellm-response-cost"])
    return None
--- a/litellm/integrations/gcs_pubsub/pub_sub.py
+++ b/litellm/integrations/gcs_pubsub/pub_sub.py
@ -10,13 +10,16 @@ import asyncio
 import json
 import os
 import traceback
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 from litellm.types.utils import StandardLoggingPayload
 if TYPE_CHECKING:
    from litellm.proxy._types import SpendLogsPayload
 else:
    SpendLogsPayload = Any
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
@ -61,7 +64,7 @@ class GcsPubSubLogger(CustomBatchLogger):
        self.flush_lock = asyncio.Lock()
        super().__init__(**kwargs, flush_lock=self.flush_lock)
        asyncio.create_task(self.periodic_flush())
-        self.log_queue: List[SpendLogsPayload] = []
+        self.log_queue: List[Union[SpendLogsPayload, StandardLoggingPayload]] = []
    async def construct_request_headers(self) -> Dict[str, str]:
        """Construct authorization headers using Vertex AI auth"""
@ -115,6 +118,10 @@ class GcsPubSubLogger(CustomBatchLogger):
            verbose_logger.debug(
                "PubSub: Logging - Enters logging function for model %s", kwargs
            )
            standard_logging_payload = kwargs.get("standard_logging_object", None)
            # Backwards compatibility with old logging payload
            if litellm.gcs_pub_sub_use_v1 is True:
                spend_logs_payload = get_logging_payload(
                    kwargs=kwargs,
                    response_obj=response_obj,
@ -122,6 +129,9 @@ class GcsPubSubLogger(CustomBatchLogger):
                    end_time=end_time,
                )
                self.log_queue.append(spend_logs_payload)
            else:
                # New logging payload, StandardLoggingPayload
                self.log_queue.append(standard_logging_payload)
            if len(self.log_queue) >= self.batch_size:
                await self.async_send_batch()
@ -155,7 +165,7 @@ class GcsPubSubLogger(CustomBatchLogger):
            self.log_queue.clear()
    async def publish_message(
-        self, message: SpendLogsPayload
+        self, message: Union[SpendLogsPayload, StandardLoggingPayload]
    ) -> Optional[Dict[str, Any]]:
        """
        Publish message to Google Cloud Pub/Sub using REST API
--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@ -79,6 +79,22 @@ def get_supported_openai_params(  # noqa: PLR0915
    elif custom_llm_provider == "maritalk":
        return litellm.MaritalkConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "openai":
        if request_type == "transcription":
            transcription_provider_config = (
                litellm.ProviderConfigManager.get_provider_audio_transcription_config(
                    model=model, provider=LlmProviders.OPENAI
                )
            )
            if isinstance(
                transcription_provider_config, litellm.OpenAIGPTAudioTranscriptionConfig
            ):
                return transcription_provider_config.get_supported_openai_params(
                    model=model
                )
            else:
                raise ValueError(
                    f"Unsupported provider config: {transcription_provider_config} for model: {model}"
                )
        return litellm.OpenAIConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "azure":
        if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -518,6 +518,16 @@ class Logging(LiteLLMLoggingBaseClass):
                }
        return data
    def _get_masked_api_base(self, api_base: str) -> str:
        if "key=" in api_base:
            # Find the position of "key=" in the string
            key_index = api_base.find("key=") + 4
            # Mask the last 5 characters after "key="
            masked_api_base = api_base[:key_index] + "*" * 5 + api_base[-4:]
        else:
            masked_api_base = api_base
        return str(masked_api_base)
    def _pre_call(self, input, api_key, model=None, additional_args={}):
        """
        Common helper function across the sync + async pre-call function
@ -531,6 +541,9 @@ class Logging(LiteLLMLoggingBaseClass):
            model
        ):  # if model name was changes pre-call, overwrite the initial model call name with the new one
            self.model_call_details["model"] = model
        self.model_call_details["litellm_params"]["api_base"] = (
            self._get_masked_api_base(additional_args.get("api_base", ""))
        )
    def pre_call(self, input, api_key, model=None, additional_args={}):  # noqa: PLR0915
@ -714,15 +727,6 @@ class Logging(LiteLLMLoggingBaseClass):
                    headers = {}
                data = additional_args.get("complete_input_dict", {})
                api_base = str(additional_args.get("api_base", ""))
                if "key=" in api_base:
                    # Find the position of "key=" in the string
                    key_index = api_base.find("key=") + 4
                    # Mask the last 5 characters after "key="
                    masked_api_base = api_base[:key_index] + "*" * 5 + api_base[-4:]
                else:
                    masked_api_base = api_base
                self.model_call_details["litellm_params"]["api_base"] = masked_api_base
                curl_command = self._get_request_curl_command(
                    api_base=api_base,
                    headers=headers,
@ -737,11 +741,12 @@ class Logging(LiteLLMLoggingBaseClass):
    def _get_request_curl_command(
        self, api_base: str, headers: Optional[dict], additional_args: dict, data: dict
    ) -> str:
        masked_api_base = self._get_masked_api_base(api_base)
        if headers is None:
            headers = {}
        curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
        curl_command += "curl -X POST \\\n"
-        curl_command += f"{api_base} \\\n"
+        curl_command += f"{masked_api_base} \\\n"
        masked_headers = self._get_masked_headers(headers)
        formatted_headers = " ".join(
            [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -1,7 +1,7 @@
 # What is this?
 ## Helper utilities for cost_per_token()
-from typing import Optional, Tuple
+from typing import Optional, Tuple, cast
 import litellm
 from litellm import verbose_logger
@ -143,26 +143,50 @@ def generic_cost_per_token(
    ### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
    prompt_cost = 0.0
    ### PROCESSING COST
-    non_cache_hit_tokens = usage.prompt_tokens
+    text_tokens = usage.prompt_tokens
    cache_hit_tokens = 0
-    if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
+    audio_tokens = 0
-        cache_hit_tokens = usage.prompt_tokens_details.cached_tokens
+    if usage.prompt_tokens_details:
-        non_cache_hit_tokens = non_cache_hit_tokens - cache_hit_tokens
+        cache_hit_tokens = (
            cast(
                Optional[int], getattr(usage.prompt_tokens_details, "cached_tokens", 0)
            )
            or 0
        )
        text_tokens = (
            cast(
                Optional[int], getattr(usage.prompt_tokens_details, "text_tokens", None)
            )
            or 0  # default to prompt tokens, if this field is not set
        )
        audio_tokens = (
            cast(Optional[int], getattr(usage.prompt_tokens_details, "audio_tokens", 0))
            or 0
        )
    ## EDGE CASE - text tokens not set inside PromptTokensDetails
    if text_tokens == 0:
        text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
    prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
-    prompt_cost = float(non_cache_hit_tokens) * prompt_base_cost
+    prompt_cost = float(text_tokens) * prompt_base_cost
    _cache_read_input_token_cost = model_info.get("cache_read_input_token_cost")
    ### CACHE READ COST
    if (
        _cache_read_input_token_cost is not None
-        and usage.prompt_tokens_details
+        and cache_hit_tokens is not None
-        and usage.prompt_tokens_details.cached_tokens
+        and cache_hit_tokens > 0
    ):
-        prompt_cost += (
+        prompt_cost += float(cache_hit_tokens) * _cache_read_input_token_cost
-            float(usage.prompt_tokens_details.cached_tokens)
+
-            * _cache_read_input_token_cost
+    ### AUDIO COST
-        )
+
    audio_token_cost = model_info.get("input_cost_per_audio_token")
    if audio_token_cost is not None and audio_tokens is not None and audio_tokens > 0:
        prompt_cost += float(audio_tokens) * audio_token_cost
    ### CACHE WRITING COST
    _cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost")
@ -175,6 +199,37 @@ def generic_cost_per_token(
    completion_base_cost = _get_completion_token_base_cost(
        model_info=model_info, usage=usage
    )
-    completion_cost = usage["completion_tokens"] * completion_base_cost
+    text_tokens = usage.completion_tokens
    audio_tokens = 0
    if usage.completion_tokens_details is not None:
        audio_tokens = (
            cast(
                Optional[int],
                getattr(usage.completion_tokens_details, "audio_tokens", 0),
            )
            or 0
        )
        text_tokens = (
            cast(
                Optional[int],
                getattr(usage.completion_tokens_details, "text_tokens", None),
            )
            or usage.completion_tokens  # default to completion tokens, if this field is not set
        )
    ## TEXT COST
    completion_cost = float(text_tokens) * completion_base_cost
    _output_cost_per_audio_token: Optional[float] = model_info.get(
        "output_cost_per_audio_token"
    )
    ## AUDIO COST
    if (
        _output_cost_per_audio_token is not None
        and audio_tokens is not None
        and audio_tokens > 0
    ):
        completion_cost += float(audio_tokens) * _output_cost_per_audio_token
    return prompt_cost, completion_cost
--- a/litellm/litellm_core_utils/model_param_helper.py
+++ b/litellm/litellm_core_utils/model_param_helper.py
@ -138,13 +138,22 @@ class ModelParamHelper:
                TranscriptionCreateParamsNonStreaming,
                TranscriptionCreateParamsStreaming,
            )
-            non_streaming_kwargs = set(getattr(TranscriptionCreateParamsNonStreaming, "__annotations__", {}).keys())
+
-            streaming_kwargs = set(getattr(TranscriptionCreateParamsStreaming, "__annotations__", {}).keys())
+            non_streaming_kwargs = set(
                getattr(
                    TranscriptionCreateParamsNonStreaming, "__annotations__", {}
                ).keys()
            )
            streaming_kwargs = set(
                getattr(
                    TranscriptionCreateParamsStreaming, "__annotations__", {}
                ).keys()
            )
            all_transcription_kwargs = non_streaming_kwargs.union(streaming_kwargs)
            return all_transcription_kwargs
        except Exception as e:
-            verbose_logger.warning("Error getting transcription kwargs %s", str(e))
+            verbose_logger.debug("Error getting transcription kwargs %s", str(e))
            return set()
    @staticmethod
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@ -2,11 +2,14 @@
 This file contains common utils for anthropic calls.
 """
-from typing import Optional, Union
+from typing import List, Optional, Union
 import httpx
 import litellm
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.secret_managers.main import get_secret_str
 class AnthropicError(BaseLLMException):
@ -19,6 +22,54 @@ class AnthropicError(BaseLLMException):
        super().__init__(status_code=status_code, message=message, headers=headers)
 class AnthropicModelInfo(BaseLLMModelInfo):
    @staticmethod
    def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
        return (
            api_base
            or get_secret_str("ANTHROPIC_API_BASE")
            or "https://api.anthropic.com"
        )
    @staticmethod
    def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
        return api_key or get_secret_str("ANTHROPIC_API_KEY")
    @staticmethod
    def get_base_model(model: Optional[str] = None) -> Optional[str]:
        return model.replace("anthropic/", "") if model else None
    def get_models(
        self, api_key: Optional[str] = None, api_base: Optional[str] = None
    ) -> List[str]:
        api_base = AnthropicModelInfo.get_api_base(api_base)
        api_key = AnthropicModelInfo.get_api_key(api_key)
        if api_base is None or api_key is None:
            raise ValueError(
                "ANTHROPIC_API_BASE or ANTHROPIC_API_KEY is not set. Please set the environment variable, to query Anthropic's `/models` endpoint."
            )
        response = litellm.module_level_client.get(
            url=f"{api_base}/v1/models",
            headers={"x-api-key": api_key, "anthropic-version": "2023-06-01"},
        )
        try:
            response.raise_for_status()
        except httpx.HTTPStatusError:
            raise Exception(
                f"Failed to fetch models from Anthropic. Status code: {response.status_code}, Response: {response.text}"
            )
        models = response.json()["data"]
        litellm_model_names = []
        for model in models:
            stripped_model_name = model["id"]
            litellm_model_name = "anthropic/" + stripped_model_name
            litellm_model_names.append(litellm_model_name)
        return litellm_model_names
 def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
    openai_headers = {}
    if "anthropic-ratelimit-requests-limit" in headers:
--- a/litellm/llms/base_llm/audio_transcription/transformation.py
+++ b/litellm/llms/base_llm/audio_transcription/transformation.py
@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional, Union
 import httpx
@ -8,7 +8,7 @@ from litellm.types.llms.openai import (
    AllMessageValues,
    OpenAIAudioTranscriptionOptionalParams,
 )
-from litellm.types.utils import ModelResponse
+from litellm.types.utils import FileTypes, ModelResponse
 if TYPE_CHECKING:
    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
@ -42,6 +42,18 @@ class BaseAudioTranscriptionConfig(BaseConfig, ABC):
        """
        return api_base or ""
    @abstractmethod
    def transform_audio_transcription_request(
        self,
        model: str,
        audio_file: FileTypes,
        optional_params: dict,
        litellm_params: dict,
    ) -> Union[dict, bytes]:
        raise NotImplementedError(
            "AudioTranscriptionConfig needs a request transformation for audio transcription models"
        )
    def transform_request(
        self,
        model: str,
--- a/litellm/llms/base_llm/base_utils.py
+++ b/litellm/llms/base_llm/base_utils.py
@ -19,11 +19,19 @@ class BaseLLMModelInfo(ABC):
        self,
        model: str,
    ) -> Optional[ProviderSpecificModelInfo]:
        """
        Default values all models of this provider support.
        """
        return None
    @abstractmethod
-    def get_models(self) -> List[str]:
+    def get_models(
-        pass
+        self, api_key: Optional[str] = None, api_base: Optional[str] = None
    ) -> List[str]:
        """
        Returns a list of models supported by this provider.
        """
        return []
    @staticmethod
    @abstractmethod
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@ -1274,13 +1274,6 @@ class AWSEventStreamDecoder:
    def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
        try:
            verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
            chunk_data["usage"] = {
                "inputTokens": 3,
                "outputTokens": 392,
                "totalTokens": 2191,
                "cacheReadInputTokens": 1796,
                "cacheWriteInputTokens": 0,
            }
            text = ""
            tool_use: Optional[ChatCompletionToolCallChunk] = None
            finish_reason = ""
--- a/litellm/llms/bedrock/image/amazon_nova_canvas_transformation.py
+++ b/litellm/llms/bedrock/image/amazon_nova_canvas_transformation.py
@ -5,7 +5,8 @@ from openai.types.image import Image
 from litellm.types.llms.bedrock import (
    AmazonNovaCanvasTextToImageRequest, AmazonNovaCanvasTextToImageResponse,
-    AmazonNovaCanvasTextToImageParams, AmazonNovaCanvasRequestBase,
+    AmazonNovaCanvasTextToImageParams, AmazonNovaCanvasRequestBase, AmazonNovaCanvasColorGuidedGenerationParams,
    AmazonNovaCanvasColorGuidedRequest,
 )
 from litellm.types.utils import ImageResponse
@ -69,6 +70,13 @@ class AmazonNovaCanvasConfig:
            text_to_image_params = AmazonNovaCanvasTextToImageParams(**text_to_image_params)
            return AmazonNovaCanvasTextToImageRequest(textToImageParams=text_to_image_params, taskType=task_type,
                                                      imageGenerationConfig=image_generation_config)
        if task_type == "COLOR_GUIDED_GENERATION":
            color_guided_generation_params = image_generation_config.pop("colorGuidedGenerationParams", {})
            color_guided_generation_params = {"text": text, **color_guided_generation_params}
            color_guided_generation_params = AmazonNovaCanvasColorGuidedGenerationParams(**color_guided_generation_params)
            return AmazonNovaCanvasColorGuidedRequest(taskType=task_type,
                                                      colorGuidedGenerationParams=color_guided_generation_params,
                                                      imageGenerationConfig=image_generation_config)
        raise NotImplementedError(f"Task type {task_type} is not supported")
    @classmethod
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -1,4 +1,3 @@
 import io
 import json
 from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
@ -8,6 +7,9 @@ import litellm
 import litellm.litellm_core_utils
 import litellm.types
 import litellm.types.utils
 from litellm.llms.base_llm.audio_transcription.transformation import (
    BaseAudioTranscriptionConfig,
 )
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
 from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
@ -852,54 +854,12 @@ class BaseLLMHTTPHandler:
            request_data=request_data,
        )
    def handle_audio_file(self, audio_file: FileTypes) -> bytes:
        """
        Processes the audio file input based on its type and returns the binary data.
        Args:
            audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
        Returns:
            The binary data of the audio file.
        """
        binary_data: bytes  # Explicitly declare the type
        # Handle the audio file based on type
        if isinstance(audio_file, str):
            # If it's a file path
            with open(audio_file, "rb") as f:
                binary_data = f.read()  # `f.read()` always returns `bytes`
        elif isinstance(audio_file, tuple):
            # Handle tuple case
            _, file_content = audio_file[:2]
            if isinstance(file_content, str):
                with open(file_content, "rb") as f:
                    binary_data = f.read()  # `f.read()` always returns `bytes`
            elif isinstance(file_content, bytes):
                binary_data = file_content
            else:
                raise TypeError(
                    f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
                )
        elif isinstance(audio_file, bytes):
            # Assume it's already binary data
            binary_data = audio_file
        elif isinstance(audio_file, io.BufferedReader) or isinstance(
            audio_file, io.BytesIO
        ):
            # Handle file-like objects
            binary_data = audio_file.read()
        else:
            raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
        return binary_data
    def audio_transcriptions(
        self,
        model: str,
        audio_file: FileTypes,
        optional_params: dict,
        litellm_params: dict,
        model_response: TranscriptionResponse,
        timeout: float,
        max_retries: int,
@ -910,11 +870,8 @@ class BaseLLMHTTPHandler:
        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
        atranscription: bool = False,
        headers: dict = {},
-        litellm_params: dict = {},
+        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
    ) -> TranscriptionResponse:
        provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
            model=model, provider=litellm.LlmProviders(custom_llm_provider)
        )
        if provider_config is None:
            raise ValueError(
                f"No provider config found for model: {model} and provider: {custom_llm_provider}"
@ -938,7 +895,18 @@ class BaseLLMHTTPHandler:
        )
        # Handle the audio file based on type
-        binary_data = self.handle_audio_file(audio_file)
+        data = provider_config.transform_audio_transcription_request(
            model=model,
            audio_file=audio_file,
            optional_params=optional_params,
            litellm_params=litellm_params,
        )
        binary_data: Optional[bytes] = None
        json_data: Optional[dict] = None
        if isinstance(data, bytes):
            binary_data = data
        else:
            json_data = data
        try:
            # Make the POST request
@ -946,6 +914,7 @@ class BaseLLMHTTPHandler:
                url=complete_url,
                headers=headers,
                content=binary_data,
                json=json_data,
                timeout=timeout,
            )
        except Exception as e:
--- a/litellm/llms/deepgram/audio_transcription/transformation.py
+++ b/litellm/llms/deepgram/audio_transcription/transformation.py
@ -2,6 +2,7 @@
 Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen`
 """
 import io
 from typing import List, Optional, Union
 from httpx import Headers, Response
@ -12,7 +13,7 @@ from litellm.types.llms.openai import (
    AllMessageValues,
    OpenAIAudioTranscriptionOptionalParams,
 )
-from litellm.types.utils import TranscriptionResponse
+from litellm.types.utils import FileTypes, TranscriptionResponse
 from ...base_llm.audio_transcription.transformation import (
    BaseAudioTranscriptionConfig,
@ -47,6 +48,55 @@ class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
            message=error_message, status_code=status_code, headers=headers
        )
    def transform_audio_transcription_request(
        self,
        model: str,
        audio_file: FileTypes,
        optional_params: dict,
        litellm_params: dict,
    ) -> Union[dict, bytes]:
        """
        Processes the audio file input based on its type and returns the binary data.
        Args:
            audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
        Returns:
            The binary data of the audio file.
        """
        binary_data: bytes  # Explicitly declare the type
        # Handle the audio file based on type
        if isinstance(audio_file, str):
            # If it's a file path
            with open(audio_file, "rb") as f:
                binary_data = f.read()  # `f.read()` always returns `bytes`
        elif isinstance(audio_file, tuple):
            # Handle tuple case
            _, file_content = audio_file[:2]
            if isinstance(file_content, str):
                with open(file_content, "rb") as f:
                    binary_data = f.read()  # `f.read()` always returns `bytes`
            elif isinstance(file_content, bytes):
                binary_data = file_content
            else:
                raise TypeError(
                    f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
                )
        elif isinstance(audio_file, bytes):
            # Assume it's already binary data
            binary_data = audio_file
        elif isinstance(audio_file, io.BufferedReader) or isinstance(
            audio_file, io.BytesIO
        ):
            # Handle file-like objects
            binary_data = audio_file.read()
        else:
            raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
        return binary_data
    def transform_audio_transcription_response(
        self,
        model: str,
--- a/litellm/llms/fireworks_ai/audio_transcription/transformation.py
+++ b/litellm/llms/fireworks_ai/audio_transcription/transformation.py
@ -2,27 +2,16 @@ from typing import List
 from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
-from ...base_llm.audio_transcription.transformation import BaseAudioTranscriptionConfig
+from ...openai.transcriptions.whisper_transformation import (
    OpenAIWhisperAudioTranscriptionConfig,
 )
 from ..common_utils import FireworksAIMixin
 class FireworksAIAudioTranscriptionConfig(
-    FireworksAIMixin, BaseAudioTranscriptionConfig
+    FireworksAIMixin, OpenAIWhisperAudioTranscriptionConfig
 ):
    def get_supported_openai_params(
        self, model: str
    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
        return ["language", "prompt", "response_format", "timestamp_granularities"]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        supported_params = self.get_supported_openai_params(model)
        for k, v in non_default_params.items():
            if k in supported_params:
                optional_params[k] = v
        return optional_params
--- a/litellm/llms/gemini/common_utils.py
+++ b/litellm/llms/gemini/common_utils.py
@ -0,0 +1,52 @@
 from typing import List, Optional
 import litellm
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
 from litellm.secret_managers.main import get_secret_str
 class GeminiModelInfo(BaseLLMModelInfo):
    @staticmethod
    def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
        return (
            api_base
            or get_secret_str("GEMINI_API_BASE")
            or "https://generativelanguage.googleapis.com/v1beta"
        )
    @staticmethod
    def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
        return api_key or (get_secret_str("GEMINI_API_KEY"))
    @staticmethod
    def get_base_model(model: str) -> Optional[str]:
        return model.replace("gemini/", "")
    def get_models(
        self, api_key: Optional[str] = None, api_base: Optional[str] = None
    ) -> List[str]:
        api_base = GeminiModelInfo.get_api_base(api_base)
        api_key = GeminiModelInfo.get_api_key(api_key)
        if api_base is None or api_key is None:
            raise ValueError(
                "GEMINI_API_BASE or GEMINI_API_KEY is not set. Please set the environment variable, to query Gemini's `/models` endpoint."
            )
        response = litellm.module_level_client.get(
            url=f"{api_base}/models?key={api_key}",
        )
        if response.status_code != 200:
            raise ValueError(
                f"Failed to fetch models from Gemini. Status code: {response.status_code}, Response: {response.json()}"
            )
        models = response.json()["models"]
        litellm_model_names = []
        for model in models:
            stripped_model_name = model["name"].strip("models/")
            litellm_model_name = "gemini/" + stripped_model_name
            litellm_model_names.append(litellm_model_name)
        return litellm_model_names
--- a/litellm/llms/mistral/mistral_chat_transformation.py
+++ b/litellm/llms/mistral/mistral_chat_transformation.py
@ -80,6 +80,7 @@ class MistralConfig(OpenAIGPTConfig):
            "temperature",
            "top_p",
            "max_tokens",
            "max_completion_tokens",
            "tools",
            "tool_choice",
            "seed",
@ -105,6 +106,10 @@ class MistralConfig(OpenAIGPTConfig):
        for param, value in non_default_params.items():
            if param == "max_tokens":
                optional_params["max_tokens"] = value
            if (
                param == "max_completion_tokens"
            ):  # max_completion_tokens should take priority
                optional_params["max_tokens"] = value
            if param == "tools":
                optional_params["tools"] = value
            if param == "stream" and value is True:
--- a/litellm/llms/openai/cost_calculation.py
+++ b/litellm/llms/openai/cost_calculation.py
@ -6,6 +6,7 @@ Helper util for handling openai-specific cost calculation
 from typing import Literal, Optional, Tuple
 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
 from litellm.types.utils import CallTypes, Usage
 from litellm.utils import get_model_info
@ -28,52 +29,53 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """
    ## GET MODEL INFO
    model_info = get_model_info(model=model, custom_llm_provider="openai")
    ## CALCULATE INPUT COST
-    ### Non-cached text tokens
+    return generic_cost_per_token(
-    non_cached_text_tokens = usage.prompt_tokens
+        model=model, usage=usage, custom_llm_provider="openai"
    cached_tokens: Optional[int] = None
    if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
        cached_tokens = usage.prompt_tokens_details.cached_tokens
        non_cached_text_tokens = non_cached_text_tokens - cached_tokens
    prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
    ## Prompt Caching cost calculation
    if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
        # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
        prompt_cost += cached_tokens * (
            model_info.get("cache_read_input_token_cost", 0) or 0
    )
    # ### Non-cached text tokens
    # non_cached_text_tokens = usage.prompt_tokens
    # cached_tokens: Optional[int] = None
    # if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
    #     cached_tokens = usage.prompt_tokens_details.cached_tokens
    #     non_cached_text_tokens = non_cached_text_tokens - cached_tokens
    # prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
    # ## Prompt Caching cost calculation
    # if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
    #     # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
    #     prompt_cost += cached_tokens * (
    #         model_info.get("cache_read_input_token_cost", 0) or 0
    #     )
-    _audio_tokens: Optional[int] = (
+    # _audio_tokens: Optional[int] = (
-        usage.prompt_tokens_details.audio_tokens
+    #     usage.prompt_tokens_details.audio_tokens
-        if usage.prompt_tokens_details is not None
+    #     if usage.prompt_tokens_details is not None
-        else None
+    #     else None
-    )
+    # )
-    _audio_cost_per_token: Optional[float] = model_info.get(
+    # _audio_cost_per_token: Optional[float] = model_info.get(
-        "input_cost_per_audio_token"
+    #     "input_cost_per_audio_token"
-    )
+    # )
-    if _audio_tokens is not None and _audio_cost_per_token is not None:
+    # if _audio_tokens is not None and _audio_cost_per_token is not None:
-        audio_cost: float = _audio_tokens * _audio_cost_per_token
+    #     audio_cost: float = _audio_tokens * _audio_cost_per_token
-        prompt_cost += audio_cost
+    #     prompt_cost += audio_cost
-    ## CALCULATE OUTPUT COST
+    # ## CALCULATE OUTPUT COST
-    completion_cost: float = (
+    # completion_cost: float = (
-        usage["completion_tokens"] * model_info["output_cost_per_token"]
+    #     usage["completion_tokens"] * model_info["output_cost_per_token"]
-    )
+    # )
-    _output_cost_per_audio_token: Optional[float] = model_info.get(
+    # _output_cost_per_audio_token: Optional[float] = model_info.get(
-        "output_cost_per_audio_token"
+    #     "output_cost_per_audio_token"
-    )
+    # )
-    _output_audio_tokens: Optional[int] = (
+    # _output_audio_tokens: Optional[int] = (
-        usage.completion_tokens_details.audio_tokens
+    #     usage.completion_tokens_details.audio_tokens
-        if usage.completion_tokens_details is not None
+    #     if usage.completion_tokens_details is not None
-        else None
+    #     else None
-    )
+    # )
-    if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
+    # if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
-        audio_cost = _output_audio_tokens * _output_cost_per_audio_token
+    #     audio_cost = _output_audio_tokens * _output_cost_per_audio_token
-        completion_cost += audio_cost
+    #     completion_cost += audio_cost
-    return prompt_cost, completion_cost
+    # return prompt_cost, completion_cost
 def cost_per_second(
--- a/litellm/llms/openai/transcriptions/gpt_transformation.py
+++ b/litellm/llms/openai/transcriptions/gpt_transformation.py
@ -0,0 +1,34 @@
 from typing import List
 from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
 from litellm.types.utils import FileTypes
 from .whisper_transformation import OpenAIWhisperAudioTranscriptionConfig
 class OpenAIGPTAudioTranscriptionConfig(OpenAIWhisperAudioTranscriptionConfig):
    def get_supported_openai_params(
        self, model: str
    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
        """
        Get the supported OpenAI params for the `gpt-4o-transcribe` models
        """
        return [
            "language",
            "prompt",
            "response_format",
            "temperature",
            "include",
        ]
    def transform_audio_transcription_request(
        self,
        model: str,
        audio_file: FileTypes,
        optional_params: dict,
        litellm_params: dict,
    ) -> dict:
        """
        Transform the audio transcription request
        """
        return {"model": model, "file": audio_file, **optional_params}
--- a/litellm/llms/openai/transcriptions/handler.py
+++ b/litellm/llms/openai/transcriptions/handler.py
@ -7,6 +7,9 @@ from pydantic import BaseModel
 import litellm
 from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.base_llm.audio_transcription.transformation import (
    BaseAudioTranscriptionConfig,
 )
 from litellm.types.utils import FileTypes
 from litellm.utils import (
    TranscriptionResponse,
@ -75,6 +78,7 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
        model: str,
        audio_file: FileTypes,
        optional_params: dict,
        litellm_params: dict,
        model_response: TranscriptionResponse,
        timeout: float,
        max_retries: int,
@ -83,16 +87,24 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
        api_base: Optional[str],
        client=None,
        atranscription: bool = False,
        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
    ) -> TranscriptionResponse:
-        data = {"model": model, "file": audio_file, **optional_params}
+        """
-
+        Handle audio transcription request
-        if "response_format" not in data or (
+        """
-            data["response_format"] == "text" or data["response_format"] == "json"
+        if provider_config is not None:
-        ):
+            data = provider_config.transform_audio_transcription_request(
-            data["response_format"] = (
+                model=model,
-                "verbose_json"  # ensures 'duration' is received - used for cost calculation
+                audio_file=audio_file,
                optional_params=optional_params,
                litellm_params=litellm_params,
            )
            if isinstance(data, bytes):
                raise ValueError("OpenAI transformation route requires a dict")
        else:
            data = {"model": model, "file": audio_file, **optional_params}
        if atranscription is True:
            return self.async_audio_transcriptions(  # type: ignore
                audio_file=audio_file,
--- a/litellm/llms/openai/transcriptions/whisper_transformation.py
+++ b/litellm/llms/openai/transcriptions/whisper_transformation.py
@ -0,0 +1,97 @@
 from typing import List, Optional, Union
 from httpx import Headers
 from litellm.llms.base_llm.audio_transcription.transformation import (
    BaseAudioTranscriptionConfig,
 )
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import (
    AllMessageValues,
    OpenAIAudioTranscriptionOptionalParams,
 )
 from litellm.types.utils import FileTypes
 from ..common_utils import OpenAIError
 class OpenAIWhisperAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
    def get_supported_openai_params(
        self, model: str
    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
        """
        Get the supported OpenAI params for the `whisper-1` models
        """
        return [
            "language",
            "prompt",
            "response_format",
            "temperature",
            "timestamp_granularities",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        """
        Map the OpenAI params to the Whisper params
        """
        supported_params = self.get_supported_openai_params(model)
        for k, v in non_default_params.items():
            if k in supported_params:
                optional_params[k] = v
        return optional_params
    def validate_environment(
        self,
        headers: dict,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
    ) -> dict:
        api_key = api_key or get_secret_str("OPENAI_API_KEY")
        auth_header = {
            "Authorization": f"Bearer {api_key}",
        }
        headers.update(auth_header)
        return headers
    def transform_audio_transcription_request(
        self,
        model: str,
        audio_file: FileTypes,
        optional_params: dict,
        litellm_params: dict,
    ) -> dict:
        """
        Transform the audio transcription request
        """
        data = {"model": model, "file": audio_file, **optional_params}
        if "response_format" not in data or (
            data["response_format"] == "text" or data["response_format"] == "json"
        ):
            data["response_format"] = (
                "verbose_json"  # ensures 'duration' is received - used for cost calculation
            )
        return data
    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[dict, Headers]
    ) -> BaseLLMException:
        return OpenAIError(
            status_code=status_code,
            message=error_message,
            headers=headers,
        )
--- a/litellm/llms/topaz/common_utils.py
+++ b/litellm/llms/topaz/common_utils.py
@ -11,7 +11,9 @@ class TopazException(BaseLLMException):
 class TopazModelInfo(BaseLLMModelInfo):
-    def get_models(self) -> List[str]:
+    def get_models(
        self, api_key: Optional[str] = None, api_base: Optional[str] = None
    ) -> List[str]:
        return [
            "topaz/Standard V2",
            "topaz/Low Resolution V2",
--- a/litellm/llms/vertex_ai/common_utils.py
+++ b/litellm/llms/vertex_ai/common_utils.py
@ -3,6 +3,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
 import httpx
 import litellm
 from litellm import supports_response_schema, supports_system_messages, verbose_logger
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.types.llms.vertex_ai import PartType
@ -28,6 +29,10 @@ def get_supports_system_message(
        supports_system_message = supports_system_messages(
            model=model, custom_llm_provider=_custom_llm_provider
        )
        # Vertex Models called in the `/gemini` request/response format also support system messages
        if litellm.VertexGeminiConfig._is_model_gemini_spec_model(model):
            supports_system_message = True
    except Exception as e:
        verbose_logger.warning(
            "Unable to identify if system message supported. Defaulting to 'False'. Received error message - {}\nAdd it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
@ -55,7 +60,9 @@ def get_supports_response_schema(
 from typing import Literal, Optional
-all_gemini_url_modes = Literal["chat", "embedding", "batch_embedding"]
+all_gemini_url_modes = Literal[
    "chat", "embedding", "batch_embedding", "image_generation"
 ]
 def _get_vertex_url(
@ -68,6 +75,8 @@ def _get_vertex_url(
 ) -> Tuple[str, str]:
    url: Optional[str] = None
    endpoint: Optional[str] = None
    model = litellm.VertexGeminiConfig.get_model_for_vertex_ai_url(model=model)
    if mode == "chat":
        ### SET RUNTIME ENDPOINT ###
        endpoint = "generateContent"
@ -91,7 +100,11 @@ def _get_vertex_url(
        if model.isdigit():
            # https://us-central1-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/us-central1/endpoints/$ENDPOINT_ID:predict
            url = f"https://{vertex_location}-aiplatform.googleapis.com/{vertex_api_version}/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}:{endpoint}"
-
+    elif mode == "image_generation":
        endpoint = "predict"
        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
        if model.isdigit():
            url = f"https://{vertex_location}-aiplatform.googleapis.com/{vertex_api_version}/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}:{endpoint}"
    if not url or not endpoint:
        raise ValueError(f"Unable to get vertex url/endpoint for mode: {mode}")
    return url, endpoint
@ -127,6 +140,10 @@ def _get_gemini_url(
        url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
            _gemini_model_name, endpoint, gemini_api_key
        )
    elif mode == "image_generation":
        raise ValueError(
            "LiteLLM's `gemini/` route does not support image generation yet. Let us know if you need this feature by opening an issue at https://github.com/BerriAI/litellm/issues"
        )
    return url, endpoint
--- a/litellm/llms/vertex_ai/cost_calculator.py
+++ b/litellm/llms/vertex_ai/cost_calculator.py
@ -4,7 +4,11 @@ from typing import Literal, Optional, Tuple, Union
 import litellm
 from litellm import verbose_logger
-from litellm.litellm_core_utils.llm_cost_calc.utils import _is_above_128k
+from litellm.litellm_core_utils.llm_cost_calc.utils import (
    _is_above_128k,
    generic_cost_per_token,
 )
 from litellm.types.utils import ModelInfo, Usage
 """
 Gemini pricing covers: 
@ -20,7 +24,7 @@ Vertex AI -> character based pricing
 Google AI Studio -> token based pricing
 """
-models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]
+models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro", "gemini-2"]
 def cost_router(
@ -46,14 +50,15 @@ def cost_router(
        call_type == "embedding" or call_type == "aembedding"
    ):
        return "cost_per_token"
    elif custom_llm_provider == "vertex_ai" and ("gemini-2" in model):
        return "cost_per_token"
    return "cost_per_character"
 def cost_per_character(
    model: str,
    custom_llm_provider: str,
-    prompt_tokens: float,
+    usage: Usage,
    completion_tokens: float,
    prompt_characters: Optional[float] = None,
    completion_characters: Optional[float] = None,
 ) -> Tuple[float, float]:
@ -86,8 +91,7 @@ def cost_per_character(
        prompt_cost, _ = cost_per_token(
            model=model,
            custom_llm_provider=custom_llm_provider,
-            prompt_tokens=prompt_tokens,
+            usage=usage,
            completion_tokens=completion_tokens,
        )
    else:
        try:
@ -124,8 +128,7 @@ def cost_per_character(
            prompt_cost, _ = cost_per_token(
                model=model,
                custom_llm_provider=custom_llm_provider,
-                prompt_tokens=prompt_tokens,
+                usage=usage,
                completion_tokens=completion_tokens,
            )
    ## CALCULATE OUTPUT COST
@ -133,10 +136,10 @@ def cost_per_character(
        _, completion_cost = cost_per_token(
            model=model,
            custom_llm_provider=custom_llm_provider,
-            prompt_tokens=prompt_tokens,
+            usage=usage,
            completion_tokens=completion_tokens,
        )
    else:
        completion_tokens = usage.completion_tokens
        try:
            if (
                _is_above_128k(tokens=completion_characters * 4)  # 1 token = 4 char
@ -172,18 +175,54 @@ def cost_per_character(
            _, completion_cost = cost_per_token(
                model=model,
                custom_llm_provider=custom_llm_provider,
-                prompt_tokens=prompt_tokens,
+                usage=usage,
                completion_tokens=completion_tokens,
            )
    return prompt_cost, completion_cost
 def _handle_128k_pricing(
    model_info: ModelInfo,
    usage: Usage,
 ) -> Tuple[float, float]:
    ## CALCULATE INPUT COST
    input_cost_per_token_above_128k_tokens = model_info.get(
        "input_cost_per_token_above_128k_tokens"
    )
    output_cost_per_token_above_128k_tokens = model_info.get(
        "output_cost_per_token_above_128k_tokens"
    )
    prompt_tokens = usage.prompt_tokens
    completion_tokens = usage.completion_tokens
    if (
        _is_above_128k(tokens=prompt_tokens)
        and input_cost_per_token_above_128k_tokens is not None
    ):
        prompt_cost = prompt_tokens * input_cost_per_token_above_128k_tokens
    else:
        prompt_cost = prompt_tokens * model_info["input_cost_per_token"]
    ## CALCULATE OUTPUT COST
    output_cost_per_token_above_128k_tokens = model_info.get(
        "output_cost_per_token_above_128k_tokens"
    )
    if (
        _is_above_128k(tokens=completion_tokens)
        and output_cost_per_token_above_128k_tokens is not None
    ):
        completion_cost = completion_tokens * output_cost_per_token_above_128k_tokens
    else:
        completion_cost = completion_tokens * model_info["output_cost_per_token"]
    return prompt_cost, completion_cost
 def cost_per_token(
    model: str,
    custom_llm_provider: str,
-    prompt_tokens: float,
+    usage: Usage,
    completion_tokens: float,
 ) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -205,38 +244,24 @@ def cost_per_token(
        model=model, custom_llm_provider=custom_llm_provider
    )
-    ## CALCULATE INPUT COST
+    ## HANDLE 128k+ PRICING
    input_cost_per_token_above_128k_tokens = model_info.get(
        "input_cost_per_token_above_128k_tokens"
    )
    output_cost_per_token_above_128k_tokens = model_info.get(
        "output_cost_per_token_above_128k_tokens"
    )
    if (
-        _is_above_128k(tokens=prompt_tokens)
+        input_cost_per_token_above_128k_tokens is not None
-        and model not in models_without_dynamic_pricing
+        or output_cost_per_token_above_128k_tokens is not None
    ):
-        assert (
+        return _handle_128k_pricing(
-            "input_cost_per_token_above_128k_tokens" in model_info
+            model_info=model_info,
-            and model_info["input_cost_per_token_above_128k_tokens"] is not None
+            usage=usage,
        ), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
            model, model_info
        )
        prompt_cost = (
            prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
        )
    else:
        prompt_cost = prompt_tokens * model_info["input_cost_per_token"]
-    ## CALCULATE OUTPUT COST
+    return generic_cost_per_token(
-    if (
+        model=model,
-        _is_above_128k(tokens=completion_tokens)
+        custom_llm_provider=custom_llm_provider,
-        and model not in models_without_dynamic_pricing
+        usage=usage,
    ):
        assert (
            "output_cost_per_token_above_128k_tokens" in model_info
            and model_info["output_cost_per_token_above_128k_tokens"] is not None
        ), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
            model, model_info
    )
        completion_cost = (
            completion_tokens * model_info["output_cost_per_token_above_128k_tokens"]
        )
    else:
        completion_cost = completion_tokens * model_info["output_cost_per_token"]
    return prompt_cost, completion_cost
--- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
@ -207,6 +207,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
            "extra_headers",
            "seed",
            "logprobs",
            "top_logprobs",  # Added this to list of supported openAI params
        ]
    def map_tool_choice_values(
@ -365,6 +366,8 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
                optional_params["presence_penalty"] = value
            if param == "logprobs":
                optional_params["responseLogprobs"] = value
            if param == "top_logprobs":
                optional_params["logprobs"] = value
            if (param == "tools" or param == "functions") and isinstance(value, list):
                optional_params["tools"] = self._map_function(value=value)
                optional_params["litellm_param_is_function_call"] = (
@ -416,6 +419,49 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
            "europe-west9",
        ]
    @staticmethod
    def get_model_for_vertex_ai_url(model: str) -> str:
        """
        Returns the model name to use in the request to Vertex AI
        Handles 2 cases:
        1. User passed `model="vertex_ai/gemini/ft-uuid"`, we need to return `ft-uuid` for the request to Vertex AI
        2. User passed `model="vertex_ai/gemini-2.0-flash-001"`, we need to return `gemini-2.0-flash-001` for the request to Vertex AI
        Args:
            model (str): The model name to use in the request to Vertex AI
        Returns:
            str: The model name to use in the request to Vertex AI
        """
        if VertexGeminiConfig._is_model_gemini_spec_model(model):
            return VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
        return model
    @staticmethod
    def _is_model_gemini_spec_model(model: Optional[str]) -> bool:
        """
        Returns true if user is trying to call custom model in `/gemini` request/response format
        """
        if model is None:
            return False
        if "gemini/" in model:
            return True
        return False
    @staticmethod
    def _get_model_name_from_gemini_spec_model(model: str) -> str:
        """
        Returns the model name if model="vertex_ai/gemini/<unique_id>"
        Example:
        - model = "gemini/1234567890"
        - returns "1234567890"
        """
        if "gemini/" in model:
            return model.split("/")[-1]
        return model
    def get_flagged_finish_reasons(self) -> Dict[str, str]:
        """
        Return Dictionary of finish reasons which indicate response was flagged
@ -597,15 +643,24 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
        completion_response: GenerateContentResponseBody,
    ) -> Usage:
        cached_tokens: Optional[int] = None
        audio_tokens: Optional[int] = None
        text_tokens: Optional[int] = None
        prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
        if "cachedContentTokenCount" in completion_response["usageMetadata"]:
            cached_tokens = completion_response["usageMetadata"][
                "cachedContentTokenCount"
            ]
        if "promptTokensDetails" in completion_response["usageMetadata"]:
            for detail in completion_response["usageMetadata"]["promptTokensDetails"]:
                if detail["modality"] == "AUDIO":
                    audio_tokens = detail["tokenCount"]
                elif detail["modality"] == "TEXT":
                    text_tokens = detail["tokenCount"]
        if cached_tokens is not None:
        prompt_tokens_details = PromptTokensDetailsWrapper(
            cached_tokens=cached_tokens,
            audio_tokens=audio_tokens,
            text_tokens=text_tokens,
        )
        ## GET USAGE ##
        usage = Usage(
@ -745,6 +800,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
                    model_response.choices.append(choice)
            usage = self._calculate_usage(completion_response=completion_response)
            setattr(model_response, "usage", usage)
            ## ADD GROUNDING METADATA ##
--- a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
+++ b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
@ -43,22 +43,23 @@ class VertexImageGeneration(VertexLLM):
    def image_generation(
        self,
        prompt: str,
        api_base: Optional[str],
        vertex_project: Optional[str],
        vertex_location: Optional[str],
        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
        model_response: ImageResponse,
        logging_obj: Any,
-        model: Optional[
+        model: str = "imagegeneration",  # vertex ai uses imagegeneration as the default model
            str
        ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
        client: Optional[Any] = None,
        optional_params: Optional[dict] = None,
        timeout: Optional[int] = None,
        aimg_generation=False,
        extra_headers: Optional[dict] = None,
    ) -> ImageResponse:
        if aimg_generation is True:
            return self.aimage_generation(  # type: ignore
                prompt=prompt,
                api_base=api_base,
                vertex_project=vertex_project,
                vertex_location=vertex_location,
                vertex_credentials=vertex_credentials,
@ -83,13 +84,27 @@ class VertexImageGeneration(VertexLLM):
        else:
            sync_handler = client  # type: ignore
-        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+        # url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
        auth_header: Optional[str] = None
        auth_header, _ = self._ensure_access_token(
            credentials=vertex_credentials,
            project_id=vertex_project,
            custom_llm_provider="vertex_ai",
        )
        auth_header, api_base = self._get_token_and_url(
            model=model,
            gemini_api_key=None,
            auth_header=auth_header,
            vertex_project=vertex_project,
            vertex_location=vertex_location,
            vertex_credentials=vertex_credentials,
            stream=False,
            custom_llm_provider="vertex_ai",
            api_base=api_base,
            should_use_v1beta1_features=False,
            mode="image_generation",
        )
        optional_params = optional_params or {
            "sampleCount": 1
        }  # default optional params
@ -99,31 +114,21 @@ class VertexImageGeneration(VertexLLM):
            "parameters": optional_params,
        }
-        request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+        headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
        logging_obj.pre_call(
            input=prompt,
            api_key=None,
            additional_args={
                "complete_input_dict": optional_params,
                "request_str": request_str,
            },
        )
        logging_obj.pre_call(
            input=prompt,
-            api_key=None,
+            api_key="",
            additional_args={
                "complete_input_dict": optional_params,
-                "request_str": request_str,
+                "api_base": api_base,
                "headers": headers,
            },
        )
        response = sync_handler.post(
-            url=url,
+            url=api_base,
-            headers={
+            headers=headers,
                "Content-Type": "application/json; charset=utf-8",
                "Authorization": f"Bearer {auth_header}",
            },
            data=json.dumps(request_data),
        )
@ -138,17 +143,17 @@ class VertexImageGeneration(VertexLLM):
    async def aimage_generation(
        self,
        prompt: str,
        api_base: Optional[str],
        vertex_project: Optional[str],
        vertex_location: Optional[str],
        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
        model_response: litellm.ImageResponse,
        logging_obj: Any,
-        model: Optional[
+        model: str = "imagegeneration",  # vertex ai uses imagegeneration as the default model
            str
        ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
        client: Optional[AsyncHTTPHandler] = None,
        optional_params: Optional[dict] = None,
        timeout: Optional[int] = None,
        extra_headers: Optional[dict] = None,
    ):
        response = None
        if client is None:
@ -169,7 +174,6 @@ class VertexImageGeneration(VertexLLM):
        # make POST request to
        # https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
        """
        Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
@ -188,11 +192,25 @@ class VertexImageGeneration(VertexLLM):
        } \
        "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
        """
        auth_header: Optional[str] = None
        auth_header, _ = self._ensure_access_token(
            credentials=vertex_credentials,
            project_id=vertex_project,
            custom_llm_provider="vertex_ai",
        )
        auth_header, api_base = self._get_token_and_url(
            model=model,
            gemini_api_key=None,
            auth_header=auth_header,
            vertex_project=vertex_project,
            vertex_location=vertex_location,
            vertex_credentials=vertex_credentials,
            stream=False,
            custom_llm_provider="vertex_ai",
            api_base=api_base,
            should_use_v1beta1_features=False,
            mode="image_generation",
        )
        optional_params = optional_params or {
            "sampleCount": 1
        }  # default optional params
@ -202,22 +220,21 @@ class VertexImageGeneration(VertexLLM):
            "parameters": optional_params,
        }
-        request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+        headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
        logging_obj.pre_call(
            input=prompt,
-            api_key=None,
+            api_key="",
            additional_args={
                "complete_input_dict": optional_params,
-                "request_str": request_str,
+                "api_base": api_base,
                "headers": headers,
            },
        )
        response = await self.async_handler.post(
-            url=url,
+            url=api_base,
-            headers={
+            headers=headers,
                "Content-Type": "application/json; charset=utf-8",
                "Authorization": f"Bearer {auth_header}",
            },
            data=json.dumps(request_data),
        )
--- a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
+++ b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
@ -226,7 +226,15 @@ class VertexMultimodalEmbedding(VertexLLM):
            else:
                return Instance(image=InstanceImage(gcsUri=input_element))
        elif is_base64_encoded(s=input_element):
-            return Instance(image=InstanceImage(bytesBase64Encoded=input_element))
+            return Instance(
                image=InstanceImage(
                    bytesBase64Encoded=(
                        input_element.split(",")[1]
                        if "," in input_element
                        else input_element
                    )
                )
            )
        else:
            return Instance(text=input_element)
--- a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
+++ b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
@ -111,7 +111,7 @@ class VertexEmbedding(VertexBase):
        )
        try:
-            response = client.post(api_base, headers=headers, json=vertex_request)  # type: ignore
+            response = client.post(url=api_base, headers=headers, json=vertex_request)  # type: ignore
            response.raise_for_status()
        except httpx.HTTPStatusError as err:
            error_code = err.response.status_code
--- a/litellm/llms/xai/common_utils.py
+++ b/litellm/llms/xai/common_utils.py
@ -0,0 +1,51 @@
 from typing import Optional
 import httpx
 import litellm
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
 from litellm.secret_managers.main import get_secret_str
 class XAIModelInfo(BaseLLMModelInfo):
    @staticmethod
    def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
        return api_base or get_secret_str("XAI_API_BASE") or "https://api.x.ai"
    @staticmethod
    def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
        return api_key or get_secret_str("XAI_API_KEY")
    @staticmethod
    def get_base_model(model: str) -> Optional[str]:
        return model.replace("xai/", "")
    def get_models(
        self, api_key: Optional[str] = None, api_base: Optional[str] = None
    ) -> list[str]:
        api_base = self.get_api_base(api_base)
        api_key = self.get_api_key(api_key)
        if api_base is None or api_key is None:
            raise ValueError(
                "XAI_API_BASE or XAI_API_KEY is not set. Please set the environment variable, to query XAI's `/models` endpoint."
            )
        response = litellm.module_level_client.get(
            url=f"{api_base}/v1/models",
            headers={"Authorization": f"Bearer {api_key}"},
        )
        try:
            response.raise_for_status()
        except httpx.HTTPStatusError:
            raise Exception(
                f"Failed to fetch models from XAI. Status code: {response.status_code}, Response: {response.text}"
            )
        models = response.json()["data"]
        litellm_model_names = []
        for model in models:
            stripped_model_name = model["id"]
            litellm_model_name = "xai/" + stripped_model_name
            litellm_model_names.append(litellm_model_name)
        return litellm_model_names
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2350,6 +2350,8 @@ def completion(  # type: ignore # noqa: PLR0915
                or litellm.api_key
            )
            api_base = api_base or litellm.api_base or get_secret("GEMINI_API_BASE")
            new_params = deepcopy(optional_params)
            response = vertex_chat_completion.completion(  # type: ignore
                model=model,
@ -2392,6 +2394,8 @@ def completion(  # type: ignore # noqa: PLR0915
                or get_secret("VERTEXAI_CREDENTIALS")
            )
            api_base = api_base or litellm.api_base or get_secret("VERTEXAI_API_BASE")
            new_params = deepcopy(optional_params)
            if (
                model.startswith("meta/")
@ -3657,6 +3661,8 @@ def embedding(  # noqa: PLR0915
                api_key or get_secret_str("GEMINI_API_KEY") or litellm.api_key
            )
            api_base = api_base or litellm.api_base or get_secret_str("GEMINI_API_BASE")
            response = google_batch_embeddings.batch_embeddings(  # type: ignore
                model=model,
                input=input,
@ -3671,6 +3677,8 @@ def embedding(  # noqa: PLR0915
                print_verbose=print_verbose,
                custom_llm_provider="gemini",
                api_key=gemini_api_key,
                api_base=api_base,
                client=client,
            )
        elif custom_llm_provider == "vertex_ai":
@ -3695,6 +3703,13 @@ def embedding(  # noqa: PLR0915
                or get_secret_str("VERTEX_CREDENTIALS")
            )
            api_base = (
                api_base
                or litellm.api_base
                or get_secret_str("VERTEXAI_API_BASE")
                or get_secret_str("VERTEX_API_BASE")
            )
            if (
                "image" in optional_params
                or "video" in optional_params
@ -3715,6 +3730,8 @@ def embedding(  # noqa: PLR0915
                    aembedding=aembedding,
                    print_verbose=print_verbose,
                    custom_llm_provider="vertex_ai",
                    client=client,
                    api_base=api_base,
                )
            else:
                response = vertex_embedding.embedding(
@ -3732,6 +3749,8 @@ def embedding(  # noqa: PLR0915
                    aembedding=aembedding,
                    print_verbose=print_verbose,
                    api_key=api_key,
                    api_base=api_base,
                    client=client,
                )
        elif custom_llm_provider == "oobabooga":
            response = oobabooga.embedding(
@ -4694,6 +4713,14 @@ def image_generation(  # noqa: PLR0915
                or optional_params.pop("vertex_ai_credentials", None)
                or get_secret_str("VERTEXAI_CREDENTIALS")
            )
            api_base = (
                api_base
                or litellm.api_base
                or get_secret_str("VERTEXAI_API_BASE")
                or get_secret_str("VERTEX_API_BASE")
            )
            model_response = vertex_image_generation.image_generation(
                model=model,
                prompt=prompt,
@ -4705,6 +4732,8 @@ def image_generation(  # noqa: PLR0915
                vertex_location=vertex_ai_location,
                vertex_credentials=vertex_credentials,
                aimg_generation=aimg_generation,
                api_base=api_base,
                client=client,
            )
        elif (
            custom_llm_provider in litellm._custom_providers
@ -5066,6 +5095,12 @@ def transcription(
    response: Optional[
        Union[TranscriptionResponse, Coroutine[Any, Any, TranscriptionResponse]]
    ] = None
    provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
        model=model,
        provider=LlmProviders(custom_llm_provider),
    )
    if custom_llm_provider == "azure":
        # azure configs
        api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE")
@ -5132,12 +5167,15 @@ def transcription(
            max_retries=max_retries,
            api_base=api_base,
            api_key=api_key,
            provider_config=provider_config,
            litellm_params=litellm_params_dict,
        )
    elif custom_llm_provider == "deepgram":
        response = base_llm_http_handler.audio_transcriptions(
            model=model,
            audio_file=file,
            optional_params=optional_params,
            litellm_params=litellm_params_dict,
            model_response=model_response,
            atranscription=atranscription,
            client=(
@ -5156,6 +5194,7 @@ def transcription(
            api_key=api_key,
            custom_llm_provider="deepgram",
            headers={},
            provider_config=provider_config,
        )
    if response is None:
        raise ValueError("Unmapped provider passed in. Unable to get the response.")
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1176,21 +1176,40 @@
        "output_cost_per_pixel": 0.0,
        "litellm_provider": "openai"
    },
    "gpt-4o-transcribe": {
        "mode": "audio_transcription",
        "input_cost_per_token": 0.0000025,
        "input_cost_per_audio_token": 0.000006,
        "output_cost_per_token": 0.00001, 
        "litellm_provider": "openai",
        "supported_endpoints": ["/v1/audio/transcriptions"]
    }, 
    "gpt-4o-mini-transcribe": {
        "mode": "audio_transcription",
        "input_cost_per_token": 0.00000125,
        "input_cost_per_audio_token": 0.000003,
        "output_cost_per_token": 0.000005, 
        "litellm_provider": "openai",
        "supported_endpoints": ["/v1/audio/transcriptions"]
    }, 
    "whisper-1": {
        "mode": "audio_transcription",
        "input_cost_per_second": 0.0001,
        "output_cost_per_second": 0.0001, 
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
        "supported_endpoints": ["/v1/audio/transcriptions"]
    }, 
    "tts-1": {
        "mode": "audio_speech", 
        "input_cost_per_character": 0.000015,
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
        "supported_endpoints": ["/v1/audio/speech"]
    },
    "tts-1-hd": {
        "mode": "audio_speech", 
        "input_cost_per_character": 0.000030,
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
        "supported_endpoints": ["/v1/audio/speech"]
    },
    "azure/gpt-4o-mini-realtime-preview-2024-12-17": {
        "max_tokens": 4096,
@ -4595,6 +4614,28 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
        "supports_tool_choice": true
    },
    "gemini-2.0-flash-lite": {
        "max_input_tokens": 1048576,
        "max_output_tokens": 8192,
        "max_images_per_prompt": 3000,
        "max_videos_per_prompt": 10,
        "max_video_length": 1,
        "max_audio_length_hours": 8.4,
        "max_audio_per_prompt": 1,
        "max_pdf_size_mb": 50,
        "input_cost_per_audio_token": 0.000000075,
        "input_cost_per_token": 0.000000075,
        "output_cost_per_token": 0.0000003,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_response_schema": true,
        "supports_audio_output": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
        "supports_tool_choice": true
    },
    "gemini/gemini-2.0-pro-exp-02-05": {
        "max_tokens": 8192,
        "max_input_tokens": 2097152,
@ -4658,6 +4699,30 @@
        "supports_tool_choice": true,
        "source": "https://ai.google.dev/pricing#2_0flash"
    },
    "gemini/gemini-2.0-flash-lite": {
        "max_input_tokens": 1048576,
        "max_output_tokens": 8192,
        "max_images_per_prompt": 3000,
        "max_videos_per_prompt": 10,
        "max_video_length": 1,
        "max_audio_length_hours": 8.4,
        "max_audio_per_prompt": 1,
        "max_pdf_size_mb": 50,
        "input_cost_per_audio_token": 0.000000075,
        "input_cost_per_token": 0.000000075,
        "output_cost_per_token": 0.0000003,
        "litellm_provider": "gemini",
        "mode": "chat",
        "tpm": 4000000,
        "rpm": 4000,
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_response_schema": true,
        "supports_audio_output": true,
        "supports_tool_choice": true,
        "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
    },
    "gemini/gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -5153,6 +5218,29 @@
        "supports_function_calling": true,
        "supports_tool_choice": true
    },
    "vertex_ai/mistral-small-2503@001": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
        "input_cost_per_token": 0.000001,
        "output_cost_per_token": 0.000003,
        "litellm_provider": "vertex_ai-mistral_models",
        "supports_function_calling": true,
        "mode": "chat",
        "supports_tool_choice": true
    },
    "vertex_ai/mistral-small-2503": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000,
        "input_cost_per_token": 0.000001,
        "output_cost_per_token": 0.000003,
        "litellm_provider": "vertex_ai-mistral_models",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_tool_choice": true
    },
    "vertex_ai/jamba-1.5-mini@001": {
        "max_tokens": 256000,
        "max_input_tokens": 256000,
@ -5304,6 +5392,23 @@
        "mode": "embedding",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
    },
    "multimodalembedding": {
        "max_tokens": 2048,
        "max_input_tokens": 2048,
        "output_vector_size": 768,
        "input_cost_per_character": 0.0000002,
        "input_cost_per_image": 0.0001,
        "input_cost_per_video_per_second": 0.0005,
        "input_cost_per_video_per_second_above_8s_interval": 0.0010,
        "input_cost_per_video_per_second_above_15s_interval": 0.0020,
        "input_cost_per_token": 0.0000008,
        "output_cost_per_token": 0,
        "litellm_provider": "vertex_ai-embedding-models",
        "mode": "embedding",
        "supported_endpoints": ["/v1/embeddings"],
        "supported_modalities": ["text", "image", "video"],
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
    },
    "text-embedding-large-exp-03-07": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
--- a/litellm/proxy/_experimental/mcp_server/server.py
+++ b/litellm/proxy/_experimental/mcp_server/server.py
@ -0,0 +1,123 @@
 """
 LiteLLM MCP Server Routes
 """
 import asyncio
 from typing import Any, Dict, List, Union
 from anyio import BrokenResourceError
 from fastapi import APIRouter, HTTPException, Request
 from fastapi.responses import StreamingResponse
 from pydantic import ValidationError
 from litellm._logging import verbose_logger
 # Check if MCP is available
 # "mcp" requires python 3.10 or higher, but several litellm users use python 3.8
 # We're making this conditional import to avoid breaking users who use python 3.8.
 try:
    from mcp.server import Server
    MCP_AVAILABLE = True
 except ImportError as e:
    verbose_logger.debug(f"MCP module not found: {e}")
    MCP_AVAILABLE = False
    router = APIRouter(
        prefix="/mcp",
        tags=["mcp"],
    )
 if MCP_AVAILABLE:
    from mcp.server import NotificationOptions, Server
    from mcp.server.models import InitializationOptions
    from mcp.types import EmbeddedResource as MCPEmbeddedResource
    from mcp.types import ImageContent as MCPImageContent
    from mcp.types import TextContent as MCPTextContent
    from mcp.types import Tool as MCPTool
    from .sse_transport import SseServerTransport
    from .tool_registry import global_mcp_tool_registry
    ########################################################
    ############ Initialize the MCP Server #################
    ########################################################
    router = APIRouter(
        prefix="/mcp",
        tags=["mcp"],
    )
    server: Server = Server("litellm-mcp-server")
    sse: SseServerTransport = SseServerTransport("/mcp/sse/messages")
    ########################################################
    ############### MCP Server Routes #######################
    ########################################################
    @server.list_tools()
    async def list_tools() -> list[MCPTool]:
        """
        List all available tools
        """
        tools = []
        for tool in global_mcp_tool_registry.list_tools():
            tools.append(
                MCPTool(
                    name=tool.name,
                    description=tool.description,
                    inputSchema=tool.input_schema,
                )
            )
        return tools
    @server.call_tool()
    async def handle_call_tool(
        name: str, arguments: Dict[str, Any] | None
    ) -> List[Union[MCPTextContent, MCPImageContent, MCPEmbeddedResource]]:
        """
        Call a specific tool with the provided arguments
        """
        tool = global_mcp_tool_registry.get_tool(name)
        if not tool:
            raise HTTPException(status_code=404, detail=f"Tool '{name}' not found")
        if arguments is None:
            raise HTTPException(
                status_code=400, detail="Request arguments are required"
            )
        try:
            result = tool.handler(**arguments)
            return [MCPTextContent(text=str(result), type="text")]
        except Exception as e:
            return [MCPTextContent(text=f"Error: {str(e)}", type="text")]
    @router.get("/", response_class=StreamingResponse)
    async def handle_sse(request: Request):
        verbose_logger.info("new incoming SSE connection established")
        async with sse.connect_sse(request) as streams:
            try:
                await server.run(streams[0], streams[1], options)
            except BrokenResourceError:
                pass
            except asyncio.CancelledError:
                pass
            except ValidationError:
                pass
            except Exception:
                raise
        await request.close()
    @router.post("/sse/messages")
    async def handle_messages(request: Request):
        verbose_logger.info("incoming SSE message received")
        await sse.handle_post_message(request.scope, request.receive, request._send)
        await request.close()
    options = InitializationOptions(
        server_name="litellm-mcp-server",
        server_version="0.1.0",
        capabilities=server.get_capabilities(
            notification_options=NotificationOptions(),
            experimental_capabilities={},
        ),
    )
--- a/litellm/proxy/_experimental/mcp_server/sse_transport.py
+++ b/litellm/proxy/_experimental/mcp_server/sse_transport.py
@ -0,0 +1,150 @@
 """
 This is a modification of code from: https://github.com/SecretiveShell/MCP-Bridge/blob/master/mcp_bridge/mcp_server/sse_transport.py
 Credit to the maintainers of SecretiveShell for their SSE Transport implementation
 """
 from contextlib import asynccontextmanager
 from typing import Any
 from urllib.parse import quote
 from uuid import UUID, uuid4
 import anyio
 import mcp.types as types
 from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
 from fastapi.requests import Request
 from fastapi.responses import Response
 from pydantic import ValidationError
 from sse_starlette import EventSourceResponse
 from starlette.types import Receive, Scope, Send
 from litellm._logging import verbose_logger
 class SseServerTransport:
    """
    SSE server transport for MCP. This class provides _two_ ASGI applications,
    suitable to be used with a framework like Starlette and a server like Hypercorn:
        1. connect_sse() is an ASGI application which receives incoming GET requests,
           and sets up a new SSE stream to send server messages to the client.
        2. handle_post_message() is an ASGI application which receives incoming POST
           requests, which should contain client messages that link to a
           previously-established SSE session.
    """
    _endpoint: str
    _read_stream_writers: dict[
        UUID, MemoryObjectSendStream[types.JSONRPCMessage | Exception]
    ]
    def __init__(self, endpoint: str) -> None:
        """
        Creates a new SSE server transport, which will direct the client to POST
        messages to the relative or absolute URL given.
        """
        super().__init__()
        self._endpoint = endpoint
        self._read_stream_writers = {}
        verbose_logger.debug(
            f"SseServerTransport initialized with endpoint: {endpoint}"
        )
    @asynccontextmanager
    async def connect_sse(self, request: Request):
        if request.scope["type"] != "http":
            verbose_logger.error("connect_sse received non-HTTP request")
            raise ValueError("connect_sse can only handle HTTP requests")
        verbose_logger.debug("Setting up SSE connection")
        read_stream: MemoryObjectReceiveStream[types.JSONRPCMessage | Exception]
        read_stream_writer: MemoryObjectSendStream[types.JSONRPCMessage | Exception]
        write_stream: MemoryObjectSendStream[types.JSONRPCMessage]
        write_stream_reader: MemoryObjectReceiveStream[types.JSONRPCMessage]
        read_stream_writer, read_stream = anyio.create_memory_object_stream(0)
        write_stream, write_stream_reader = anyio.create_memory_object_stream(0)
        session_id = uuid4()
        session_uri = f"{quote(self._endpoint)}?session_id={session_id.hex}"
        self._read_stream_writers[session_id] = read_stream_writer
        verbose_logger.debug(f"Created new session with ID: {session_id}")
        sse_stream_writer: MemoryObjectSendStream[dict[str, Any]]
        sse_stream_reader: MemoryObjectReceiveStream[dict[str, Any]]
        sse_stream_writer, sse_stream_reader = anyio.create_memory_object_stream(
            0, dict[str, Any]
        )
        async def sse_writer():
            verbose_logger.debug("Starting SSE writer")
            async with sse_stream_writer, write_stream_reader:
                await sse_stream_writer.send({"event": "endpoint", "data": session_uri})
                verbose_logger.debug(f"Sent endpoint event: {session_uri}")
                async for message in write_stream_reader:
                    verbose_logger.debug(f"Sending message via SSE: {message}")
                    await sse_stream_writer.send(
                        {
                            "event": "message",
                            "data": message.model_dump_json(
                                by_alias=True, exclude_none=True
                            ),
                        }
                    )
        async with anyio.create_task_group() as tg:
            response = EventSourceResponse(
                content=sse_stream_reader, data_sender_callable=sse_writer
            )
            verbose_logger.debug("Starting SSE response task")
            tg.start_soon(response, request.scope, request.receive, request._send)
            verbose_logger.debug("Yielding read and write streams")
            yield (read_stream, write_stream)
    async def handle_post_message(
        self, scope: Scope, receive: Receive, send: Send
    ) -> Response:
        verbose_logger.debug("Handling POST message")
        request = Request(scope, receive)
        session_id_param = request.query_params.get("session_id")
        if session_id_param is None:
            verbose_logger.warning("Received request without session_id")
            response = Response("session_id is required", status_code=400)
            return response
        try:
            session_id = UUID(hex=session_id_param)
            verbose_logger.debug(f"Parsed session ID: {session_id}")
        except ValueError:
            verbose_logger.warning(f"Received invalid session ID: {session_id_param}")
            response = Response("Invalid session ID", status_code=400)
            return response
        writer = self._read_stream_writers.get(session_id)
        if not writer:
            verbose_logger.warning(f"Could not find session for ID: {session_id}")
            response = Response("Could not find session", status_code=404)
            return response
        json = await request.json()
        verbose_logger.debug(f"Received JSON: {json}")
        try:
            message = types.JSONRPCMessage.model_validate(json)
            verbose_logger.debug(f"Validated client message: {message}")
        except ValidationError as err:
            verbose_logger.error(f"Failed to parse message: {err}")
            response = Response("Could not parse message", status_code=400)
            await writer.send(err)
            return response
        verbose_logger.debug(f"Sending message to writer: {message}")
        response = Response("Accepted", status_code=202)
        await writer.send(message)
        return response
--- a/litellm/proxy/_experimental/mcp_server/tool_registry.py
+++ b/litellm/proxy/_experimental/mcp_server/tool_registry.py
@ -0,0 +1,103 @@
 import json
 from typing import Any, Callable, Dict, List, Optional
 from litellm._logging import verbose_logger
 from litellm.proxy.types_utils.utils import get_instance_fn
 from litellm.types.mcp_server.tool_registry import MCPTool
 class MCPToolRegistry:
    """
    A registry for managing MCP tools
    """
    def __init__(self):
        # Registry to store all registered tools
        self.tools: Dict[str, MCPTool] = {}
    def register_tool(
        self,
        name: str,
        description: str,
        input_schema: Dict[str, Any],
        handler: Callable,
    ) -> None:
        """
        Register a new tool in the registry
        """
        self.tools[name] = MCPTool(
            name=name,
            description=description,
            input_schema=input_schema,
            handler=handler,
        )
        verbose_logger.debug(f"Registered tool: {name}")
    def get_tool(self, name: str) -> Optional[MCPTool]:
        """
        Get a tool from the registry by name
        """
        return self.tools.get(name)
    def list_tools(self) -> List[MCPTool]:
        """
        List all registered tools
        """
        return list(self.tools.values())
    def load_tools_from_config(
        self, mcp_tools_config: Optional[Dict[str, Any]] = None
    ) -> None:
        """
        Load and register tools from the proxy config
        Args:
            mcp_tools_config: The mcp_tools config from the proxy config
        """
        if mcp_tools_config is None:
            raise ValueError(
                "mcp_tools_config is required, please set `mcp_tools` in your proxy config"
            )
        for tool_config in mcp_tools_config:
            if not isinstance(tool_config, dict):
                raise ValueError("mcp_tools_config must be a list of dictionaries")
            name = tool_config.get("name")
            description = tool_config.get("description")
            input_schema = tool_config.get("input_schema", {})
            handler_name = tool_config.get("handler")
            if not all([name, description, handler_name]):
                continue
            # Try to resolve the handler
            # First check if it's a module path (e.g., "module.submodule.function")
            if handler_name is None:
                raise ValueError(f"handler is required for tool {name}")
            handler = get_instance_fn(handler_name)
            if handler is None:
                verbose_logger.warning(
                    f"Warning: Could not find handler {handler_name} for tool {name}"
                )
                continue
            # Register the tool
            if name is None:
                raise ValueError(f"name is required for tool {name}")
            if description is None:
                raise ValueError(f"description is required for tool {name}")
            self.register_tool(
                name=name,
                description=description,
                input_schema=input_schema,
                handler=handler,
            )
        verbose_logger.debug(
            "all registered tools: %s", json.dumps(self.tools, indent=4, default=str)
        )
 global_mcp_tool_registry = MCPToolRegistry()
--- a/litellm/proxy/_experimental/out/_next/static/chunks/250-601568e45a5ffece.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-601568e45a5ffece.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/250-a75ee9d79f1140b0.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-a75ee9d79f1140b0.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/394-48a36e9c9b2cb488.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/394-48a36e9c9b2cb488.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-429ad74a94df7643.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-429ad74a94df7643.js
@ -0,0 +1 @@
 (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{96443:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=96443)}),_N_E=n.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-af8319e6c59a08da.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-af8319e6c59a08da.js
@ -1 +0,0 @@
 (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{6580:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=6580)}),_N_E=n.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-cde2fb783e81a6c1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-cde2fb783e81a6c1.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{11790:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=11790)}),_N_E=e.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{21024:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=21024)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-1ffe69692e4b2037.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-1ffe69692e4b2037.js
@ -1 +0,0 @@
 (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{32922:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tirer."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=32922)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-5110f2c6a3c9a2f4.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-5110f2c6a3c9a2f4.js
@ -0,0 +1 @@
 (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{8672:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tirer."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=8672)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-75d771fb848b47a8.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-75d771fb848b47a8.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-e21d4be3d6c3c16e.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-e21d4be3d6c3c16e.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-app-4f7318ae681a6d94.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-app-4f7318ae681a6d94.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{20169:function(e,n,t){Promise.resolve().then(t.t.bind(t,12846,23)),Promise.resolve().then(t.t.bind(t,19107,23)),Promise.resolve().then(t.t.bind(t,61060,23)),Promise.resolve().then(t.t.bind(t,4707,23)),Promise.resolve().then(t.t.bind(t,80,23)),Promise.resolve().then(t.t.bind(t,36423,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,117],function(){return n(54278),n(20169)}),_N_E=e.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{10264:function(e,n,t){Promise.resolve().then(t.t.bind(t,12846,23)),Promise.resolve().then(t.t.bind(t,19107,23)),Promise.resolve().then(t.t.bind(t,61060,23)),Promise.resolve().then(t.t.bind(t,4707,23)),Promise.resolve().then(t.t.bind(t,80,23)),Promise.resolve().then(t.t.bind(t,36423,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,117],function(){return n(54278),n(10264)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/soi--ciJeUE6G2Fk4NMBG/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/soi--ciJeUE6G2Fk4NMBG/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/soi--ciJeUE6G2Fk4NMBG/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/soi--ciJeUE6G2Fk4NMBG/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/assets/logos/anthropic.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/anthropic.svg
@ -0,0 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
 <svg width="46" height="46" viewBox="0 0 46 46" fill="none" xmlns="http://www.w3.org/2000/svg">
  <circle cx="23" cy="23" r="23" fill="white"/>
  <path d="M32.73 7h-6.945L38.45 39h6.945L32.73 7ZM12.665 7 0 39h7.082l2.59-6.72h13.25l2.59 6.72h7.082L19.929 7h-7.264Zm-.702 19.337 4.334-11.246 4.334 11.246h-8.668Z" fill="#000000"></path>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/assemblyai_small.png
+++ b/litellm/proxy/_experimental/out/assets/logos/assemblyai_small.png
--- a/litellm/proxy/_experimental/out/assets/logos/aws.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/aws.svg
@ -0,0 +1,34 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Generator: Adobe Illustrator 26.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 <svg version="1.0" id="katman_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 	 viewBox="0 0 600 450" style="enable-background:new 0 0 600 450;" xml:space="preserve">
 <style type="text/css">
 	.st0{fill:none;}
 	.st1{fill-rule:evenodd;clip-rule:evenodd;fill:#343B45;}
 	.st2{fill-rule:evenodd;clip-rule:evenodd;fill:#F4981A;}
 </style>
 <g id="_x31__stroke">
 	<g id="Amazon_1_">
 		<rect x="161.2" y="86.5" class="st0" width="277.8" height="277.8"/>
 		<g id="Amazon">
 			<path class="st1" d="M315,163.7c-8,0.6-17.2,1.2-26.4,2.4c-14.1,1.9-28.2,4.3-39.8,9.8c-22.7,9.2-38,28.8-38,57.6
 				c0,36.2,23.3,54.6,52.7,54.6c9.8,0,17.8-1.2,25.1-3.1c11.7-3.7,21.5-10.4,33.1-22.7c6.7,9.2,8.6,13.5,20.2,23.3
 				c3.1,1.2,6.1,1.2,8.6-0.6c7.4-6.1,20.3-17.2,27-23.3c3.1-2.5,2.5-6.1,0.6-9.2c-6.7-8.6-13.5-16-13.5-32.5V165
 				c0-23.3,1.9-44.8-15.3-60.7c-14.1-12.9-36.2-17.8-53.4-17.8h-7.4c-31.2,1.8-64.3,15.3-71.7,54c-1.2,4.9,2.5,6.8,4.9,7.4l34.3,4.3
 				c3.7-0.6,5.5-3.7,6.1-6.7c3.1-13.5,14.1-20.2,26.3-21.5h2.5c7.4,0,15.3,3.1,19.6,9.2c4.9,7.4,4.3,17.2,4.3,25.8L315,163.7
 				L315,163.7z M308.2,236.7c-4.3,8.6-11.7,14.1-19.6,16c-1.2,0-3.1,0.6-4.9,0.6c-13.5,0-21.4-10.4-21.4-25.8
 				c0-19.6,11.6-28.8,26.3-33.1c8-1.8,17.2-2.5,26.4-2.5v7.4C315,213.4,315.6,224.4,308.2,236.7z"/>
 			<path class="st2" d="M398.8,311.4c-1.4,0-2.8,0.3-4.1,0.9c-1.5,0.6-3,1.3-4.4,1.9l-2.1,0.9l-2.7,1.1v0
 				c-29.8,12.1-61.1,19.2-90.1,19.8c-1.1,0-2.1,0-3.2,0c-45.6,0-82.8-21.1-120.3-42c-1.3-0.7-2.7-1-4-1c-1.7,0-3.4,0.6-4.7,1.8
 				c-1.3,1.2-2,2.9-2,4.7c0,2.3,1.2,4.4,2.9,5.7c35.2,30.6,73.8,59,125.7,59c1,0,2,0,3.1,0c33-0.7,70.3-11.9,99.3-30.1l0.2-0.1
 				c3.8-2.3,7.6-4.9,11.2-7.7c2.2-1.6,3.8-4.2,3.8-6.9C407.2,314.6,403.2,311.4,398.8,311.4z M439,294.5L439,294.5
 				c-0.1-2.9-0.7-5.1-1.9-6.9l-0.1-0.2l-0.1-0.2c-1.2-1.3-2.4-1.8-3.7-2.4c-3.8-1.5-9.3-2.3-16-2.3c-4.8,0-10.1,0.5-15.4,1.6l0-0.4
 				l-5.3,1.8l-0.1,0l-3,1v0.1c-3.5,1.5-6.8,3.3-9.8,5.5c-1.9,1.4-3.4,3.2-3.5,6.1c0,1.5,0.7,3.3,2,4.3c1.3,1,2.8,1.4,4.1,1.4
 				c0.3,0,0.6,0,0.9-0.1l0.3,0l0.2,0c2.6-0.6,6.4-0.9,10.9-1.6c3.8-0.4,7.9-0.7,11.4-0.7c2.5,0,4.7,0.2,6.3,0.5
 				c0.8,0.2,1.3,0.4,1.6,0.5c0.1,0,0.2,0.1,0.2,0.1c0.1,0.2,0.2,0.8,0.1,1.5c0,2.9-1.2,8.4-2.9,13.7c-1.7,5.3-3.7,10.7-5,14.2
 				c-0.3,0.8-0.5,1.7-0.5,2.7c0,1.4,0.6,3.2,1.8,4.3c1.2,1.1,2.8,1.6,4.1,1.6h0.1c2,0,3.6-0.8,5.1-1.9
 				c13.6-12.2,18.3-31.7,18.5-42.6L439,294.5z"/>
 		</g>
 	</g>
 </g>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/bedrock.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/bedrock.svg
@ -0,0 +1 @@
 <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Bedrock</title><defs><linearGradient id="lobe-icons-bedrock-fill" x1="80%" x2="20%" y1="20%" y2="80%"><stop offset="0%" stop-color="#6350FB"></stop><stop offset="50%" stop-color="#3D8FFF"></stop><stop offset="100%" stop-color="#9AD8F8"></stop></linearGradient></defs><path d="M13.05 15.513h3.08c.214 0 .389.177.389.394v1.82a1.704 1.704 0 011.296 1.661c0 .943-.755 1.708-1.685 1.708-.931 0-1.686-.765-1.686-1.708 0-.807.554-1.484 1.297-1.662v-1.425h-2.69v4.663a.395.395 0 01-.188.338l-2.69 1.641a.385.385 0 01-.405-.002l-4.926-3.086a.395.395 0 01-.185-.336V16.3L2.196 14.87A.395.395 0 012 14.555L2 14.528V9.406c0-.14.073-.27.192-.34l2.465-1.462V4.448c0-.129.062-.249.165-.322l.021-.014L9.77 1.058a.385.385 0 01.407 0l2.69 1.675a.395.395 0 01.185.336V7.6h3.856V5.683a1.704 1.704 0 01-1.296-1.662c0-.943.755-1.708 1.685-1.708.931 0 1.685.765 1.685 1.708 0 .807-.553 1.484-1.296 1.662v2.311a.391.391 0 01-.389.394h-4.245v1.806h6.624a1.69 1.69 0 011.64-1.313c.93 0 1.685.764 1.685 1.707 0 .943-.754 1.708-1.685 1.708a1.69 1.69 0 01-1.64-1.314H13.05v1.937h4.953l.915 1.18a1.66 1.66 0 01.84-.227c.931 0 1.685.764 1.685 1.707 0 .943-.754 1.708-1.685 1.708-.93 0-1.685-.765-1.685-1.708 0-.346.102-.668.276-.937l-.724-.935H13.05v1.806zM9.973 1.856L7.93 3.122V6.09h-.778V3.604L5.435 4.669v2.945l2.11 1.36L9.712 7.61V5.334h.778V7.83c0 .136-.07.263-.184.335L7.963 9.638v2.081l1.422 1.009-.446.646-1.406-.998-1.53 1.005-.423-.66 1.605-1.055v-1.99L5.038 8.29l-2.26 1.34v1.676l1.972-1.189.398.677-2.37 1.429V14.3l2.166 1.258 2.27-1.368.397.677-2.176 1.311V19.3l1.876 1.175 2.365-1.426.398.678-2.017 1.216 1.918 1.201 2.298-1.403v-5.78l-4.758 2.893-.4-.675 5.158-3.136V3.289L9.972 1.856zM16.13 18.47a.913.913 0 00-.908.92c0 .507.406.918.908.918a.913.913 0 00.907-.919.913.913 0 00-.907-.92zm3.63-3.81a.913.913 0 00-.908.92c0 .508.406.92.907.92a.913.913 0 00.908-.92.913.913 0 00-.908-.92zm1.555-4.99a.913.913 0 00-.908.92c0 .507.407.918.908.918a.913.913 0 00.907-.919.913.913 0 00-.907-.92zM17.296 3.1a.913.913 0 00-.907.92c0 .508.406.92.907.92a.913.913 0 00.908-.92.913.913 0 00-.908-.92z" fill="url(#lobe-icons-bedrock-fill)" fill-rule="nonzero"></path></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/cerebras.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/cerebras.svg
@ -0,0 +1,89 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Generator: Adobe Illustrator 26.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 <svg version="1.0" id="katman_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 	 viewBox="0 0 800 600" style="enable-background:new 0 0 800 600;" xml:space="preserve">
 <style type="text/css">
 	.st0{fill-rule:evenodd;clip-rule:evenodd;fill:#F05A28;}
 	.st1{fill-rule:evenodd;clip-rule:evenodd;fill:#231F20;}
 </style>
 <g id="Contact">
 	<g id="Contact-us" transform="translate(-234.000000, -1114.000000)">
 		<g id="map" transform="translate(-6.000000, 1027.000000)">
 			<g id="Contact-box" transform="translate(190.000000, 36.000000)">
 				<g id="Group-26" transform="translate(50.000000, 51.000000)">
 					<g id="Group-3">
 						<path id="Fill-1" class="st0" d="M220.9,421c-17,0-33.1-3.4-47.8-9.5c-22-9.2-40.8-24.6-54.1-44c-13.3-19.4-21-42.7-21-67.9
 							c0-16.8,3.4-32.7,9.7-47.3c9.3-21.8,24.9-40.3,44.5-53.4c19.6-13.1,43.2-20.7,68.7-20.7v-18.3c-19.5,0-38.1,3.9-55.1,11
 							c-25.4,10.6-47,28.3-62.2,50.6c-15.3,22.3-24.2,49.2-24.2,78.1c0,19.3,4,37.7,11.1,54.4c10.7,25.1,28.7,46.4,51.2,61.5
 							c22.6,15.1,49.8,23.9,79.1,23.9V421z"/>
 						<path id="Fill-4" class="st0" d="M157.9,374.1c-11.5-9.6-20.1-21.2-25.9-33.9c-5.8-12.7-8.8-26.4-8.8-40.2
 							c0-11,1.9-22,5.6-32.5c3.8-10.5,9.4-20.5,17.1-29.6c9.6-11.4,21.3-20,34-25.8c12.7-5.8,26.6-8.7,40.4-8.7
 							c11,0,22.1,1.9,32.6,5.6c10.6,3.8,20.6,9.4,29.7,17l11.9-14.1c-10.8-9-22.8-15.8-35.4-20.2c-12.6-4.5-25.7-6.7-38.8-6.7
 							c-16.5,0-32.9,3.5-48.1,10.4c-15.2,6.9-29.1,17.2-40.5,30.7c-9.1,10.8-15.8,22.7-20.3,35.2c-4.5,12.5-6.7,25.6-6.7,38.7
 							c0,16.4,3.5,32.8,10.4,47.9c6.9,15.1,17.3,29,30.9,40.3L157.9,374.1z"/>
 						<path id="Fill-6" class="st0" d="M186.4,362.2c-12.1-6.4-21.6-15.7-28.1-26.6c-6.5-10.9-9.9-23.5-9.9-36.2
 							c0-11.2,2.6-22.5,8.3-33c6.4-12.1,15.8-21.5,26.8-27.9c11-6.5,23.6-9.9,36.4-9.9c11.2,0,22.6,2.6,33.2,8.2l8.6-16.3
 							c-13.3-7-27.7-10.4-41.9-10.3c-16.1,0-32,4.3-45.8,12.4c-13.8,8.1-25.7,20.1-33.7,35.2c-7,13.3-10.4,27.6-10.4,41.6
 							c0,16,4.3,31.8,12.5,45.5c8.2,13.8,20.2,25.5,35.4,33.5L186.4,362.2z"/>
 						<path id="Fill-8" class="st0" d="M221,344.6c-6.3,0-12.3-1.3-17.7-3.6c-8.2-3.4-15.1-9.2-20-16.5c-4.9-7.3-7.8-16-7.8-25.4
 							c0-6.3,1.3-12.3,3.6-17.7c3.4-8.1,9.2-15.1,16.5-20c7.3-4.9,16-7.8,25.4-7.8v-18.4c-8.8,0-17.2,1.8-24.9,5
 							c-11.5,4.9-21.2,12.9-28.1,23.1C161,273.6,157,286,157,299.2c0,8.8,1.8,17.2,5,24.9c4.9,11.5,13,21.2,23.2,28.1
 							C195.4,359,207.7,363,221,363V344.6z"/>
 					</g>
 					<g id="Group" transform="translate(22.000000, 13.000000)">
 						<path id="Fill-10" class="st1" d="M214,271.6c-2.1-2.2-4.4-4-6.7-5.3c-2.3-1.3-4.7-2-7.2-2c-3.4,0-6.3,0.6-9,1.8
 							c-2.6,1.2-4.9,2.8-6.8,4.9c-1.9,2-3.3,4.4-4.3,7c-1,2.6-1.4,5.4-1.4,8.2c0,2.8,0.5,5.6,1.4,8.2c1,2.6,2.4,5,4.3,7
 							c1.9,2,4.1,3.7,6.8,4.9c2.6,1.2,5.6,1.8,9,1.8c2.8,0,5.5-0.6,7.9-1.7c2.4-1.2,4.5-2.9,6.2-5.1l12.2,13.1
 							c-1.8,1.8-3.9,3.4-6.3,4.7c-2.4,1.3-4.8,2.4-7.2,3.2s-4.8,1.4-7,1.7c-2.2,0.4-4.2,0.5-5.8,0.5c-5.5,0-10.7-0.9-15.5-2.7
 							c-4.9-1.8-9.1-4.4-12.6-7.8c-3.6-3.3-6.4-7.4-8.5-12.1c-2.1-4.7-3.1-10-3.1-15.7c0-5.8,1-11,3.1-15.7
 							c2.1-4.7,4.9-8.7,8.5-12.1c3.6-3.3,7.8-5.9,12.6-7.8c4.9-1.8,10.1-2.7,15.5-2.7c4.7,0,9.4,0.9,14.1,2.7
 							c4.7,1.8,8.9,4.6,12.4,8.4L214,271.6z"/>
 						<path id="Fill-12" class="st1" d="M280.4,278.9c-0.1-5.4-1.8-9.6-5-12.7c-3.3-3.1-7.8-4.6-13.6-4.6c-5.5,0-9.8,1.6-13,4.7
 							c-3.2,3.1-5.2,7.4-5.9,12.6H280.4z M243,292.6c0.6,5.5,2.7,9.7,6.4,12.8c3.7,3,8.1,4.6,13.3,4.6c4.6,0,8.4-0.9,11.5-2.8
 							c3.1-1.9,5.8-4.2,8.2-7.1l13.1,9.9c-4.3,5.3-9,9-14.3,11.3c-5.3,2.2-10.8,3.3-16.6,3.3c-5.5,0-10.7-0.9-15.5-2.7
 							c-4.9-1.8-9.1-4.4-12.6-7.8c-3.6-3.3-6.4-7.4-8.5-12.1c-2.1-4.7-3.1-10-3.1-15.7c0-5.8,1-11,3.1-15.7
 							c2.1-4.7,4.9-8.7,8.5-12.1c3.6-3.3,7.8-5.9,12.6-7.8c4.9-1.8,10.1-2.7,15.5-2.7c5.1,0,9.7,0.9,13.9,2.7
 							c4.2,1.8,7.8,4.3,10.8,7.7c3,3.3,5.3,7.5,7,12.4c1.7,4.9,2.5,10.6,2.5,17v5H243z"/>
 						<path id="Fill-14" class="st1" d="M306.5,249.7h18.3v11.5h0.3c2-4.3,4.9-7.5,8.7-9.9c3.8-2.3,8.1-3.5,12.9-3.5
 							c1.1,0,2.2,0.1,3.3,0.3c1.1,0.2,2.2,0.5,3.3,0.8v17.6c-1.5-0.4-3-0.7-4.5-1c-1.5-0.3-2.9-0.4-4.3-0.4c-4.3,0-7.7,0.8-10.3,2.4
 							c-2.6,1.6-4.6,3.4-5.9,5.4c-1.4,2-2.3,4.1-2.7,6.1c-0.5,2-0.7,3.5-0.7,4.6v39h-18.3V249.7z"/>
 						<path id="Fill-16" class="st1" d="M409,278.9c-0.1-5.4-1.8-9.6-5-12.7c-3.3-3.1-7.8-4.6-13.6-4.6c-5.5,0-9.8,1.6-13,4.7
 							c-3.2,3.1-5.2,7.4-5.9,12.6H409z M371.6,292.6c0.6,5.5,2.7,9.7,6.4,12.8c3.7,3,8.1,4.6,13.3,4.6c4.6,0,8.4-0.9,11.5-2.8
 							c3.1-1.9,5.8-4.2,8.2-7.1l13.1,9.9c-4.3,5.3-9,9-14.3,11.3c-5.3,2.2-10.8,3.3-16.6,3.3c-5.5,0-10.7-0.9-15.5-2.7
 							c-4.9-1.8-9.1-4.4-12.6-7.8c-3.6-3.3-6.4-7.4-8.5-12.1c-2.1-4.7-3.1-10-3.1-15.7c0-5.8,1-11,3.1-15.7
 							c2.1-4.7,4.9-8.7,8.5-12.1c3.6-3.3,7.8-5.9,12.6-7.8c4.9-1.8,10.1-2.7,15.5-2.7c5.1,0,9.7,0.9,13.9,2.7
 							c4.2,1.8,7.8,4.3,10.8,7.7c3,3.3,5.3,7.5,7,12.4c1.7,4.9,2.5,10.6,2.5,17v5H371.6z"/>
 						<path id="Fill-18" class="st1" d="M494.6,286.2c0-2.8-0.5-5.6-1.5-8.2c-1-2.6-2.4-5-4.3-7c-1.9-2-4.2-3.7-6.9-4.9
 							c-2.7-1.2-5.7-1.8-9.1-1.8c-3.4,0-6.4,0.6-9.1,1.8c-2.7,1.2-5,2.8-6.9,4.9c-1.9,2-3.3,4.4-4.3,7c-1,2.6-1.5,5.4-1.5,8.2
 							c0,2.8,0.5,5.6,1.5,8.2c1,2.6,2.4,5,4.3,7c1.9,2,4.2,3.7,6.9,4.9c2.7,1.2,5.7,1.8,9.1,1.8c3.4,0,6.4-0.6,9.1-1.8
 							c2.7-1.2,5-2.8,6.9-4.9c1.9-2,3.3-4.4,4.3-7C494.1,291.8,494.6,289,494.6,286.2L494.6,286.2z M433.2,207.6h18.5v51.3h0.5
 							c0.9-1.2,2.1-2.5,3.5-3.7c1.4-1.3,3.2-2.5,5.2-3.6c2.1-1.1,4.4-2,7.1-2.7c2.7-0.7,5.8-1.1,9.3-1.1c5.2,0,10.1,1,14.5,3
 							c4.4,2,8.2,4.7,11.3,8.1c3.1,3.5,5.6,7.5,7.3,12.2c1.7,4.7,2.6,9.7,2.6,15.1c0,5.4-0.8,10.4-2.5,15.1
 							c-1.6,4.7-4.1,8.7-7.2,12.2c-3.2,3.5-7,6.2-11.6,8.1c-4.5,2-9.6,3-15.3,3c-5.2,0-10.1-1-14.7-3c-4.5-2-8.1-5.3-10.8-9.7h-0.3
 							v11h-17.6V207.6z"/>
 						<path id="Fill-20" class="st1" d="M520.9,249.7h18.3v11.5h0.3c2-4.3,4.9-7.5,8.7-9.9c3.8-2.3,8.1-3.5,12.9-3.5
 							c1.1,0,2.2,0.1,3.3,0.3c1.1,0.2,2.2,0.5,3.3,0.8v17.6c-1.5-0.4-3-0.7-4.5-1c-1.5-0.3-2.9-0.4-4.3-0.4c-4.3,0-7.7,0.8-10.3,2.4
 							c-2.6,1.6-4.6,3.4-5.9,5.4c-1.4,2-2.3,4.1-2.7,6.1c-0.5,2-0.7,3.5-0.7,4.6v39h-18.3V249.7z"/>
 						<path id="Fill-22" class="st1" d="M616,290h-3.9c-2.6,0-5.5,0.1-8.7,0.3c-3.2,0.2-6.2,0.7-9.1,1.4c-2.8,0.8-5.2,1.9-7.2,3.3
 							c-2,1.5-2.9,3.5-2.9,6.2c0,1.7,0.4,3.2,1.2,4.3c0.8,1.2,1.8,2.2,3,3c1.2,0.8,2.6,1.4,4.2,1.8c1.5,0.4,3.1,0.5,4.6,0.5
 							c6.4,0,11.1-1.5,14.2-4.5c3-3,4.6-7.1,4.6-12.2V290z M617.1,312.7h-0.5c-2.7,4.2-6.1,7.2-10.2,9.1c-4.1,1.9-8.7,2.8-13.6,2.8
 							c-3.4,0-6.7-0.5-10-1.4s-6.1-2.3-8.7-4.1c-2.5-1.8-4.6-4.1-6.1-6.8s-2.3-5.9-2.3-9.6c0-4,0.7-7.3,2.2-10.1
 							c1.4-2.8,3.4-5.1,5.8-7c2.4-1.9,5.2-3.4,8.4-4.5c3.2-1.1,6.5-2,10-2.5c3.5-0.6,6.9-0.9,10.5-1.1c3.5-0.2,6.8-0.2,9.9-0.2h4.6
 							v-2c0-4.6-1.6-8-4.8-10.3c-3.2-2.3-7.3-3.4-12.2-3.4c-3.9,0-7.6,0.7-11,2.1c-3.4,1.4-6.4,3.2-8.8,5.6l-9.8-9.6
 							c4.1-4.2,9-7.1,14.5-9c5.5-1.8,11.2-2.7,17.1-2.7c5.3,0,9.7,0.6,13.3,1.7c3.6,1.2,6.6,2.7,9,4.5c2.4,1.8,4.2,3.9,5.5,6.3
 							c1.3,2.4,2.2,4.8,2.8,7.2c0.6,2.4,0.9,4.8,1,7.1c0.1,2.3,0.2,4.3,0.2,6v42h-16.7V312.7z"/>
 						<path id="Fill-24" class="st1" d="M683.6,269.9c-3.6-5-8.4-7.5-14.4-7.5c-2.5,0-4.9,0.6-7.2,1.8c-2.4,1.2-3.5,3.2-3.5,5.9
 							c0,2.2,1,3.9,2.9,4.9c1.9,1,4.4,1.9,7.4,2.6c3,0.7,6.2,1.4,9.6,2.2c3.4,0.8,6.6,1.9,9.6,3.5c3,1.6,5.4,3.7,7.4,6.5
 							c1.9,2.7,2.9,6.5,2.9,11.3c0,4.4-0.9,8-2.8,11c-1.9,3-4.3,5.4-7.4,7.2c-3,1.8-6.4,3.1-10.2,4c-3.8,0.8-7.6,1.2-11.3,1.2
 							c-5.7,0-11-0.8-15.8-2.4c-4.8-1.6-9.1-4.6-12.9-8.8l12.3-11.4c2.4,2.6,4.9,4.8,7.6,6.5c2.7,1.7,6,2.5,9.9,2.5
 							c1.3,0,2.7-0.2,4.1-0.5c1.4-0.3,2.8-0.8,4-1.5c1.2-0.7,2.2-1.6,3-2.7c0.8-1.1,1.1-2.3,1.1-3.7c0-2.5-1-4.4-2.9-5.6
 							c-1.9-1.2-4.4-2.2-7.4-3c-3-0.8-6.2-1.5-9.6-2.1c-3.4-0.7-6.6-1.7-9.6-3.2c-3-1.5-5.4-3.5-7.4-6.2c-1.9-2.6-2.9-6.3-2.9-11
 							c0-4.1,0.8-7.6,2.5-10.6c1.7-3,3.9-5.4,6.7-7.4c2.8-1.9,5.9-3.3,9.5-4.3c3.6-0.9,7.2-1.4,10.9-1.4c4.9,0,9.8,0.8,14.6,2.5
 							c4.8,1.7,8.7,4.5,11.7,8.6L683.6,269.9z"/>
 					</g>
 				</g>
 			</g>
 		</g>
 	</g>
 </g>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/cohere.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/cohere.svg
@ -0,0 +1 @@
 <svg xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" xml:space="preserve" style="enable-background:new 0 0 75 75" viewBox="0 0 75 75" width="75"  height="75" ><path d="M24.3 44.7c2 0 6-.1 11.6-2.4 6.5-2.7 19.3-7.5 28.6-12.5 6.5-3.5 9.3-8.1 9.3-14.3C73.8 7 66.9 0 58.3 0h-36C10 0 0 10 0 22.3s9.4 22.4 24.3 22.4z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#39594d"/><path d="M30.4 60c0-6 3.6-11.5 9.2-13.8l11.3-4.7C62.4 36.8 75 45.2 75 57.6 75 67.2 67.2 75 57.6 75H45.3c-8.2 0-14.9-6.7-14.9-15z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#d18ee2"/><path d="M12.9 47.6C5.8 47.6 0 53.4 0 60.5v1.7C0 69.2 5.8 75 12.9 75c7.1 0 12.9-5.8 12.9-12.9v-1.7c-.1-7-5.8-12.8-12.9-12.8z" style="fill:#ff7759"/></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/databricks.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/databricks.svg
@ -0,0 +1 @@
 <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>DBRX</title><path d="M21.821 9.894l-9.81 5.595L1.505 9.511 1 9.787v4.34l11.01 6.256 9.811-5.574v2.297l-9.81 5.596-10.506-5.979L1 17v.745L12.01 24 23 17.745v-4.34l-.505-.277-10.484 5.957-9.832-5.574v-2.298l9.832 5.574L23 10.532V6.255l-.547-.319-10.442 5.936-9.327-5.276 9.327-5.298 7.663 4.362.673-.383v-.532L12.011 0 1 6.255v.681l11.01 6.255 9.811-5.595z" fill="#EE3D2C" fill-rule="nonzero"></path></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/deepseek.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/deepseek.svg
@ -0,0 +1,25 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Generator: Adobe Illustrator 25.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 	 viewBox="0 0 292.6 215.3" style="enable-background:new 0 0 292.6 215.3;" xml:space="preserve">
 <style type="text/css">
 	.st0{fill:#566AB2;}
 </style>
 <path class="st0" d="M191.3,123.7c-2.4,1-4.9,1.8-7.2,1.9c-3.6,0.2-7.6-1.3-9.7-3.1c-3.3-2.8-5.7-4.4-6.7-9.2
 	c-0.4-2.1-0.2-5.3,0.2-7.2c0.9-4-0.1-6.5-2.9-8.9c-2.3-1.9-5.2-2.4-8.4-2.4s-2.3-0.5-3.1-1c-1.3-0.7-2.4-2.3-1.4-4.4
 	c0.3-0.7,2-2.3,2.3-2.5c4.3-2.5,9.4-1.7,14,0.2c4.3,1.7,7.5,5,12.2,9.5c4.8,5.5,5.6,7,8.4,11.1c2.1,3.2,4.1,6.6,5.4,10.4
 	C195.2,120.5,194.2,122.4,191.3,123.7L191.3,123.7z M153.4,104.3c0-2.1,1.7-3.7,3.8-3.7s0.9,0.1,1.3,0.2c0.5,0.2,1,0.5,1.4,0.9
 	c0.7,0.7,1.1,1.6,1.1,2.6c0,2.1-1.7,3.8-3.8,3.8s-3.7-1.7-3.7-3.8H153.4z M141.2,182.8c-25.5-20-37.8-26.6-42.9-26.3
 	c-4.8,0.3-3.9,5.7-2.8,9.3c1.1,3.5,2.5,5.9,4.5,9c1.4,2,2.3,5.1-1.4,7.3c-8.2,5.1-22.5-1.7-23.1-2c-16.6-9.8-30.5-22.7-40.2-40.3
 	c-9.5-17-14.9-35.2-15.8-54.6c-0.2-4.7,1.1-6.4,5.8-7.2c6.2-1.1,12.5-1.4,18.7-0.5c26,3.8,48.1,15.4,66.7,33.8
 	c10.6,10.5,18.6,23,26.8,35.2c8.8,13,18.2,25.4,30.2,35.5c4.3,3.6,7.6,6.3,10.9,8.2c-9.8,1.1-26.1,1.3-37.2-7.5L141.2,182.8z
 	 M289.5,18c-3.1-1.5-4.4,1.4-6.3,2.8c-0.6,0.5-1.1,1.1-1.7,1.7c-4.5,4.8-9.8,8-16.8,7.6c-10.1-0.6-18.7,2.6-26.4,10.4
 	c-1.6-9.5-7-15.2-15.2-18.9c-4.3-1.9-8.6-3.8-11.6-7.9c-2.1-2.9-2.7-6.2-3.7-9.4c-0.7-2-1.3-3.9-3.6-4.3c-2.4-0.4-3.4,1.7-4.3,3.4
 	c-3.8,7-5.3,14.6-5.2,22.4c0.3,17.5,7.7,31.5,22.4,41.4c1.7,1.1,2.1,2.3,1.6,3.9c-1,3.4-2.2,6.7-3.3,10.1c-0.7,2.2-1.7,2.7-4,1.7
 	c-8.1-3.4-15-8.4-21.2-14.4c-10.4-10.1-19.9-21.2-31.6-30c-2.8-2.1-5.5-4-8.4-5.7c-12-11.7,1.6-21.3,4.7-22.4
 	c3.3-1.2,1.2-5.3-9.5-5.2c-10.6,0-20.3,3.6-32.8,8.4c-1.8,0.7-3.7,1.2-5.7,1.7c-11.3-2.1-22.9-2.6-35.1-1.2
 	c-23,2.5-41.4,13.4-54.8,32C1,68.3-2.8,93.6,1.9,120c4.9,27.8,19.1,50.9,41,68.9c22.6,18.7,48.7,27.8,78.5,26.1
 	c18.1-1,38.2-3.5,60.9-22.7c5.7,2.8,11.7,4,21.7,4.8c7.7,0.7,15.1-0.4,20.8-1.5c9-1.9,8.4-10.2,5.1-11.7
 	c-26.3-12.3-20.5-7.3-25.7-11.3c13.3-15.8,33.5-32.2,41.3-85.4c0.6-4.2,0.1-6.9,0-10.3c0-2.1,0.4-2.9,2.8-3.1
 	c6.6-0.8,13-2.6,18.8-5.8c17-9.3,23.9-24.6,25.5-42.9c0.2-2.8,0-5.7-3-7.2L289.5,18z"/>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/fireworks.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/fireworks.svg
@ -0,0 +1 @@
 <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Fireworks</title><path clip-rule="evenodd" d="M14.8 5l-2.801 6.795L9.195 5H7.397l3.072 7.428a1.64 1.64 0 003.038.002L16.598 5H14.8zm1.196 10.352l5.124-5.244-.699-1.669-5.596 5.739a1.664 1.664 0 00-.343 1.807 1.642 1.642 0 001.516 1.012L16 17l8-.02-.699-1.669-7.303.041h-.002zM2.88 10.104l.699-1.669 5.596 5.739c.468.479.603 1.189.343 1.807a1.643 1.643 0 01-1.516 1.012l-8-.018-.002.002.699-1.669 7.303.042-5.122-5.246z" fill="#5019C5" fill-rule="evenodd"></path></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/google.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/google.svg
@ -0,0 +1,2 @@
 <?xml version="1.0" encoding="utf-8"?>
 <svg viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="none"><path fill="#4285F4" d="M14.9 8.161c0-.476-.039-.954-.121-1.422h-6.64v2.695h3.802a3.24 3.24 0 01-1.407 2.127v1.75h2.269c1.332-1.22 2.097-3.02 2.097-5.15z"/><path fill="#34A853" d="M8.14 15c1.898 0 3.499-.62 4.665-1.69l-2.268-1.749c-.631.427-1.446.669-2.395.669-1.836 0-3.393-1.232-3.952-2.888H1.85v1.803A7.044 7.044 0 008.14 15z"/><path fill="#FBBC04" d="M4.187 9.342a4.17 4.17 0 010-2.68V4.859H1.849a6.97 6.97 0 000 6.286l2.338-1.803z"/><path fill="#EA4335" d="M8.14 3.77a3.837 3.837 0 012.7 1.05l2.01-1.999a6.786 6.786 0 00-4.71-1.82 7.042 7.042 0 00-6.29 3.858L4.186 6.66c.556-1.658 2.116-2.89 3.952-2.89z"/></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/groq.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/groq.svg
@ -0,0 +1,3 @@
 <?xml version="1.0" encoding="utf-8" ?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 26.3 26.3"><defs><style>.cls-1{fill:#f05237;}.cls-2{fill:#fff;}</style></defs><g id="Layer_2" data-name="Layer 2"><g id="Content"><circle class="cls-1" cx="13.15" cy="13.15" r="13.15"/><path class="cls-2" d="M13.17,6.88a4.43,4.43,0,0,0,0,8.85h1.45V14.07H13.17a2.77,2.77,0,1,1,2.77-2.76v4.07a2.74,2.74,0,0,1-4.67,2L10.1,18.51a4.37,4.37,0,0,0,3.07,1.29h.06a4.42,4.42,0,0,0,4.36-4.4V11.2a4.43,4.43,0,0,0-4.42-4.32"/></g></g></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/microsoft_azure.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/microsoft_azure.svg
--- a/litellm/proxy/_experimental/out/assets/logos/mistral.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/mistral.svg
@ -0,0 +1 @@
 <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Mistral</title><path d="M3.428 3.4h3.429v3.428H3.428V3.4zm13.714 0h3.43v3.428h-3.43V3.4z" fill="gold"></path><path d="M3.428 6.828h6.857v3.429H3.429V6.828zm10.286 0h6.857v3.429h-6.857V6.828z" fill="#FFAF00"></path><path d="M3.428 10.258h17.144v3.428H3.428v-3.428z" fill="#FF8205"></path><path d="M3.428 13.686h3.429v3.428H3.428v-3.428zm6.858 0h3.429v3.428h-3.429v-3.428zm6.856 0h3.43v3.428h-3.43v-3.428z" fill="#FA500F"></path><path d="M0 17.114h10.286v3.429H0v-3.429zm13.714 0H24v3.429H13.714v-3.429z" fill="#E10500"></path></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/ollama.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/ollama.svg
--- a/litellm/proxy/_experimental/out/assets/logos/openai_small.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/openai_small.svg
@ -0,0 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
 <svg fill="#000000" viewBox="-2 -2 28 28" role="img" xmlns="http://www.w3.org/2000/svg">
    <circle cx="12" cy="12" r="14" fill="white" />
 <path d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"/>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/openrouter.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/openrouter.svg
@ -0,0 +1,39 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg id="Layer_1" xmlns="http://www.w3.org/2000/svg" version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 300 300">
  <!-- Generator: Adobe Illustrator 29.2.1, SVG Export Plug-In . SVG Version: 2.1.0 Build 116)  -->
  <defs>
    <style>
      .st0 {
        fill: none;
      }
      .st1 {
        stroke-width: 52.7px;
      }
      .st1, .st2 {
        stroke: #000;
        stroke-miterlimit: 2.3;
      }
      .st2 {
        stroke-width: .6px;
      }
      .st3 {
        clip-path: url(#clippath);
      }
    </style>
    <clipPath id="clippath">
      <rect class="st0" width="300" height="300"/>
    </clipPath>
  </defs>
  <g class="st3">
    <g>
      <path class="st1" d="M1.8,145.9c8.8,0,42.8-7.6,60.4-17.5s17.6-10,53.9-35.7c46-32.6,78.5-21.7,131.8-21.7"/>
      <path class="st2" d="M299.4,71.2l-90.1,52V19.2l90.1,52Z"/>
      <path class="st1" d="M0,145.9c8.8,0,42.8,7.6,60.4,17.5s17.6,10,53.9,35.7c46,32.6,78.5,21.7,131.8,21.7"/>
      <path class="st2" d="M297.7,220.6l-90.1-52v104l90.1-52Z"/>
    </g>
  </g>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/perplexity-ai.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/perplexity-ai.svg
@ -0,0 +1,16 @@
 <?xml version="1.0" encoding="iso-8859-1"?>
 <!-- Generator: Adobe Illustrator 26.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 	 viewBox="0 0 48 48" style="enable-background:new 0 0 48 48;" xml:space="preserve">
 <linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="10.5862" y1="1.61" x2="36.0543" y2="44.1206">
 	<stop  offset="0.002" style="stop-color:#9C55D4"/>
 	<stop  offset="0.003" style="stop-color:#20808D"/>
 	<stop  offset="0.3731" style="stop-color:#218F9B"/>
 	<stop  offset="1" style="stop-color:#22B1BC"/>
 </linearGradient>
 <path style="fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_1_);" d="M11.469,4l11.39,10.494v-0.002V4.024h2.217v10.517
 	L36.518,4v11.965h4.697v17.258h-4.683v10.654L25.077,33.813v10.18h-2.217V33.979L11.482,44V33.224H6.785V15.965h4.685V4z
 	 M21.188,18.155H9.002v12.878h2.477v-4.062L21.188,18.155z M13.699,27.943v11.17l9.16-8.068V19.623L13.699,27.943z M25.141,30.938
 	V19.612l9.163,8.321v5.291h0.012v5.775L25.141,30.938z M36.532,31.033h2.466V18.155H26.903l9.629,8.725V31.033z M34.301,15.965
 	V9.038l-7.519,6.927H34.301z M21.205,15.965h-7.519V9.038L21.205,15.965z"/>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/sambanova.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/sambanova.svg
@ -0,0 +1 @@
 <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>SambaNova</title><path d="M23 23h-1.223V8.028c0-3.118-2.568-5.806-5.744-5.806H8.027c-3.176 0-5.744 2.565-5.744 5.686 0 3.119 2.568 5.684 5.744 5.684h.794c1.346 0 2.445 1.1 2.445 2.444 0 1.346-1.1 2.446-2.445 2.446H1v-1.223h7.761c.671 0 1.223-.551 1.223-1.16 0-.67-.552-1.16-1.223-1.16h-.794C4.177 14.872 1 11.756 1 7.909 1 4.058 4.176 1 8.027 1h8.066C19.88 1 23 4.239 23 8.028V23z" fill="#EE7624"></path><path d="M8.884 12.672c1.71.06 3.361 1.588 3.361 3.422 0 1.833-1.528 3.421-3.421 3.421H1v1.223h7.761c2.568 0 4.705-2.077 4.705-4.644 0-.672-.123-1.283-.43-1.894-.245-.551-.67-1.1-1.099-1.528-.489-.429-1.039-.734-1.65-.977-.525-.175-1.048-.193-1.594-.212-.218-.008-.441-.016-.669-.034-.428 0-1.406-.245-1.956-.61a3.369 3.369 0 01-1.223-1.406c-.183-.489-.305-.977-.305-1.528A3.417 3.417 0 017.96 4.482h8.066c1.895 0 3.422 1.65 3.422 3.483v15.032h1.223V8.027c0-2.568-2.077-4.768-4.645-4.768h-8c-2.568 0-4.705 2.077-4.705 4.646 0 .67.123 1.282.43 1.894a4.45 4.45 0 001.099 1.528c.429.428 1.039.734 1.588.976.306.123.611.183.976.246.857.06 1.406.123 1.466.123h.003z" fill="#EE7624"></path><path d="M1 23h7.761v-.003c3.85 0 7.03-3.116 7.09-7.026 0-3.79-3.117-6.906-6.967-6.906H8.09c-.672 0-1.222-.552-1.222-1.16 0-.608.487-1.16 1.159-1.16h8.069c.608 0 1.159.611 1.159 1.283v14.97h1.223V8.024c0-1.345-1.1-2.505-2.445-2.505H7.967a2.451 2.451 0 00-2.445 2.445 2.45 2.45 0 002.445 2.445h.794c3.176 0 5.744 2.568 5.744 5.684s-2.568 5.684-5.744 5.684H1V23z" fill="#EE7624"></path></svg>
--- a/litellm/proxy/_experimental/out/assets/logos/togetherai.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/togetherai.svg
@ -0,0 +1,14 @@
 <svg width="32" height="32" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg">
 <g clip-path="url(#clip0_542_18748)">
 <rect width="32" height="32" rx="5.64706" fill="#F1EFED"/>
 <circle cx="22.8233" cy="9.64706" r="5.64706" fill="#D3D1D1"/>
 <circle cx="22.8233" cy="22.8238" r="5.64706" fill="#D3D1D1"/>
 <circle cx="9.64706" cy="22.8238" r="5.64706" fill="#D3D1D1"/>
 <circle cx="9.64706" cy="9.64706" r="5.64706" fill="#0F6FFF"/>
 </g>
 <defs>
 <clipPath id="clip0_542_18748">
 <rect width="32" height="32" fill="white"/>
 </clipPath>
 </defs>
 </svg>
--- a/litellm/proxy/_experimental/out/assets/logos/xai.svg
+++ b/litellm/proxy/_experimental/out/assets/logos/xai.svg
@ -0,0 +1,28 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1000 1000">
    <defs>
        <style>
            .cls-1 {
            fill: #000;
            }
            polygon {
            fill: #fff;
            }
            @media ( prefers-color-scheme: dark ) {
            .cls-1 {
            fill: #fff;
            }
            polygon {
            fill: #000;
            }
            }
        </style>
    </defs>
    <rect class="cls-1" width="1000" height="1000"/>
    <g>
        <polygon points="226.83 411.15 501.31 803.15 623.31 803.15 348.82 411.15 226.83 411.15" />
        <polygon points="348.72 628.87 226.69 803.15 348.77 803.15 409.76 716.05 348.72 628.87" />
        <polygon points="651.23 196.85 440.28 498.12 501.32 585.29 773.31 196.85 651.23 196.85" />
        <polygon points="673.31 383.25 673.31 803.15 773.31 803.15 773.31 240.44 673.31 383.25" />
    </g>
 </svg>
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[14164,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-0222ddf4d701e0b4.js\",\"250\",\"static/chunks/250-a75ee9d79f1140b0.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-75d771fb848b47a8.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"9yIyUkG6nV2cO0gn7kJ-Q\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[14164,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-0222ddf4d701e0b4.js","250","static/chunks/250-a75ee9d79f1140b0.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-75d771fb848b47a8.js"],"default",1]
+3:I[20314,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-48a36e9c9b2cb488.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-e21d4be3d6c3c16e.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["9yIyUkG6nV2cO0gn7kJ-Q",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-a75ee9d79f1140b0.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
+3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["9yIyUkG6nV2cO0gn7kJ-Q",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/Show more
+++ b/Show more
		`@ -0,0 +1,3 @@`
							`-- AlterTable`
							`ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "api_requests" INTEGER NOT NULL DEFAULT 0;`
		`@ -0,0 +1 @@`
							`(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[185],{96443:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=96443)}),_N_E=n.O()}]);`
		`@ -1 +0,0 @@`
			`(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[185],{6580:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=6580)}),_N_E=n.O()}]);`
		`@ -1 +1 @@`
			(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[418],{11790:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=11790)}),_N_E=e.O()}]);				(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[418],{21024:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=21024)}),_N_E=e.O()}]);
		`@ -0,0 +1 @@`
							<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Bedrock</title><defs><linearGradient id="lobe-icons-bedrock-fill" x1="80%" x2="20%" y1="20%" y2="80%"><stop offset="0%" stop-color="#6350FB"></stop><stop offset="50%" stop-color="#3D8FFF"></stop><stop offset="100%" stop-color="#9AD8F8"></stop></linearGradient></defs><path d="M13.05 15.513h3.08c.214 0 .389.177.389.394v1.82a1.704 1.704 0 011.296 1.661c0 .943-.755 1.708-1.685 1.708-.931 0-1.686-.765-1.686-1.708 0-.807.554-1.484 1.297-1.662v-1.425h-2.69v4.663a.395.395 0 01-.188.338l-2.69 1.641a.385.385 0 01-.405-.002l-4.926-3.086a.395.395 0 01-.185-.336V16.3L2.196 14.87A.395.395 0 012 14.555L2 14.528V9.406c0-.14.073-.27.192-.34l2.465-1.462V4.448c0-.129.062-.249.165-.322l.021-.014L9.77 1.058a.385.385 0 01.407 0l2.69 1.675a.395.395 0 01.185.336V7.6h3.856V5.683a1.704 1.704 0 01-1.296-1.662c0-.943.755-1.708 1.685-1.708.931 0 1.685.765 1.685 1.708 0 .807-.553 1.484-1.296 1.662v2.311a.391.391 0 01-.389.394h-4.245v1.806h6.624a1.69 1.69 0 011.64-1.313c.93 0 1.685.764 1.685 1.707 0 .943-.754 1.708-1.685 1.708a1.69 1.69 0 01-1.64-1.314H13.05v1.937h4.953l.915 1.18a1.66 1.66 0 01.84-.227c.931 0 1.685.764 1.685 1.707 0 .943-.754 1.708-1.685 1.708-.93 0-1.685-.765-1.685-1.708 0-.346.102-.668.276-.937l-.724-.935H13.05v1.806zM9.973 1.856L7.93 3.122V6.09h-.778V3.604L5.435 4.669v2.945l2.11 1.36L9.712 7.61V5.334h.778V7.83c0 .136-.07.263-.184.335L7.963 9.638v2.081l1.422 1.009-.446.646-1.406-.998-1.53 1.005-.423-.66 1.605-1.055v-1.99L5.038 8.29l-2.26 1.34v1.676l1.972-1.189.398.677-2.37 1.429V14.3l2.166 1.258 2.27-1.368.397.677-2.176 1.311V19.3l1.876 1.175 2.365-1.426.398.678-2.017 1.216 1.918 1.201 2.298-1.403v-5.78l-4.758 2.893-.4-.675 5.158-3.136V3.289L9.972 1.856zM16.13 18.47a.913.913 0 00-.908.92c0 .507.406.918.908.918a.913.913 0 00.907-.919.913.913 0 00-.907-.92zm3.63-3.81a.913.913 0 00-.908.92c0 .508.406.92.907.92a.913.913 0 00.908-.92.913.913 0 00-.908-.92zm1.555-4.99a.913.913 0 00-.908.92c0 .507.407.918.908.918a.913.913 0 00.907-.919.913.913 0 00-.907-.92zM17.296 3.1a.913.913 0 00-.907.92c0 .508.406.92.907.92a.913.913 0 00.908-.92.913.913 0 00-.908-.92z" fill="url(#lobe-icons-bedrock-fill)" fill-rule="nonzero"></path></svg>
		`@ -0,0 +1 @@`
							<svg xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" xml:space="preserve" style="enable-background:new 0 0 75 75" viewBox="0 0 75 75" width="75" height="75" ><path d="M24.3 44.7c2 0 6-.1 11.6-2.4 6.5-2.7 19.3-7.5 28.6-12.5 6.5-3.5 9.3-8.1 9.3-14.3C73.8 7 66.9 0 58.3 0h-36C10 0 0 10 0 22.3s9.4 22.4 24.3 22.4z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#39594d"/><path d="M30.4 60c0-6 3.6-11.5 9.2-13.8l11.3-4.7C62.4 36.8 75 45.2 75 57.6 75 67.2 67.2 75 57.6 75H45.3c-8.2 0-14.9-6.7-14.9-15z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#d18ee2"/><path d="M12.9 47.6C5.8 47.6 0 53.4 0 60.5v1.7C0 69.2 5.8 75 12.9 75c7.1 0 12.9-5.8 12.9-12.9v-1.7c-.1-7-5.8-12.8-12.9-12.8z" style="fill:#ff7759"/></svg>
		`@ -0,0 +1,2 @@`
							`<?xml version="1.0" encoding="utf-8"?>`
							<svg viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="none"><path fill="#4285F4" d="M14.9 8.161c0-.476-.039-.954-.121-1.422h-6.64v2.695h3.802a3.24 3.24 0 01-1.407 2.127v1.75h2.269c1.332-1.22 2.097-3.02 2.097-5.15z"/><path fill="#34A853" d="M8.14 15c1.898 0 3.499-.62 4.665-1.69l-2.268-1.749c-.631.427-1.446.669-2.395.669-1.836 0-3.393-1.232-3.952-2.888H1.85v1.803A7.044 7.044 0 008.14 15z"/><path fill="#FBBC04" d="M4.187 9.342a4.17 4.17 0 010-2.68V4.859H1.849a6.97 6.97 0 000 6.286l2.338-1.803z"/><path fill="#EA4335" d="M8.14 3.77a3.837 3.837 0 012.7 1.05l2.01-1.999a6.786 6.786 0 00-4.71-1.82 7.042 7.042 0 00-6.29 3.858L4.186 6.66c.556-1.658 2.116-2.89 3.952-2.89z"/></svg>
		`@ -1 +1 @@`
			<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[14164,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-0222ddf4d701e0b4.js\",\"250\",\"static/chunks/250-a75ee9d79f1140b0.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-75d771fb848b47a8.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"9yIyUkG6nV2cO0gn7kJ-Q\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>				<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>