Merge branch 'BerriAI:main' into main

2025-04-26 03:04:13 +00:00 · 2025-04-02 19:56:53 +05:30 · 2025-04-02 19:56:53 +05:30 · 75f41a2d64
commit 75f41a2d64
parent 04cd517fa4 6c69ad4c89
134 changed files with 3935 additions and 1451 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -1450,7 +1450,7 @@ jobs:
          command: |
            pwd
            ls
-            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
+            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/spend_tracking_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
          no_output_timeout: 120m
      # Store test results
@ -1743,6 +1743,96 @@ jobs:
      # Store test results
      - store_test_results:
          path: test-results
  proxy_spend_accuracy_tests:
    machine:
      image: ubuntu-2204:2023.10.1
    resource_class: xlarge
    working_directory: ~/project
    steps:
      - checkout
      - setup_google_dns
      - run:
          name: Install Docker CLI (In case it's not already installed)
          command: |
            sudo apt-get update
            sudo apt-get install -y docker-ce docker-ce-cli containerd.io
      - run:
          name: Install Python 3.9
          command: |
            curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
            bash miniconda.sh -b -p $HOME/miniconda
            export PATH="$HOME/miniconda/bin:$PATH"
            conda init bash
            source ~/.bashrc
            conda create -n myenv python=3.9 -y
            conda activate myenv
            python --version
      - run:
          name: Install Dependencies
          command: |
            pip install "pytest==7.3.1"
            pip install "pytest-asyncio==0.21.1"
            pip install aiohttp
            python -m pip install --upgrade pip
            python -m pip install -r requirements.txt
      - run:
          name: Build Docker image
          command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
      - run:
          name: Run Docker container
          # intentionally give bad redis credentials here
          # the OTEL test - should get this as a trace
          command: |
            docker run -d \
              -p 4000:4000 \
              -e DATABASE_URL=$PROXY_DATABASE_URL \
              -e REDIS_HOST=$REDIS_HOST \
              -e REDIS_PASSWORD=$REDIS_PASSWORD \
              -e REDIS_PORT=$REDIS_PORT \
              -e LITELLM_MASTER_KEY="sk-1234" \
              -e OPENAI_API_KEY=$OPENAI_API_KEY \
              -e LITELLM_LICENSE=$LITELLM_LICENSE \
              -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
              -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
              -e USE_DDTRACE=True \
              -e DD_API_KEY=$DD_API_KEY \
              -e DD_SITE=$DD_SITE \
              -e AWS_REGION_NAME=$AWS_REGION_NAME \
              --name my-app \
              -v $(pwd)/litellm/proxy/example_config_yaml/spend_tracking_config.yaml:/app/config.yaml \
              my-app:latest \
              --config /app/config.yaml \
              --port 4000 \
              --detailed_debug \
      - run:
          name: Install curl and dockerize
          command: |
            sudo apt-get update
            sudo apt-get install -y curl
            sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
            sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
            sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
      - run:
          name: Start outputting logs
          command: docker logs -f my-app
          background: true
      - run:
          name: Wait for app to be ready
          command: dockerize -wait http://localhost:4000 -timeout 5m
      - run:
          name: Run tests
          command: |
            pwd
            ls
            python -m pytest -vv tests/spend_tracking_tests -x --junitxml=test-results/junit.xml --durations=5
          no_output_timeout:
            120m
            # Clean up first container
      - run:
          name: Stop and remove first container
          command: |
            docker stop my-app
            docker rm my-app
  proxy_multi_instance_tests:
    machine:
@ -2553,6 +2643,12 @@ workflows:
              only:
                - main
                - /litellm_.*/
      - proxy_spend_accuracy_tests:
          filters:
            branches:
              only:
                - main
                - /litellm_.*/
      - proxy_multi_instance_tests:
          filters:
            branches:
@ -2714,6 +2810,7 @@ workflows:
            - installing_litellm_on_python
            - installing_litellm_on_python_3_13
            - proxy_logging_guardrails_model_info_tests
            - proxy_spend_accuracy_tests
            - proxy_multi_instance_tests
            - proxy_store_model_in_db_tests
            - proxy_build_from_pip_tests
--- a/.github/workflows/test-linting.yml
+++ b/.github/workflows/test-linting.yml
@ -24,10 +24,10 @@ jobs:
      run: |
        poetry install --with dev
-    - name: Run Black formatting check
+    - name: Run Black formatting
      run: |
        cd litellm
-        poetry run black . --check
+        poetry run black .
        cd ..
    - name: Run Ruff linting
--- a/cookbook/misc/dev_release.txt
+++ b/cookbook/misc/dev_release.txt
@ -1,2 +1,11 @@
 python3 -m build
 twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ - 
 Note: You might need to make a MANIFEST.ini file on root for build process incase it fails 
 Place this in MANIFEST.ini
 recursive-exclude venv *
 recursive-exclude myenv *
 recursive-exclude py313_env *
 recursive-exclude **/.venv *
--- a/docs/my-website/docs/anthropic_unified.md
+++ b/docs/my-website/docs/anthropic_unified.md
@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
 # /v1/messages [BETA] 
-LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. 
+Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format. 
-This currently just supports the Anthropic API. 
+
 ## Overview 
 | Feature | Supported | Notes | 
 |-------|-------|-------|
@ -21,9 +22,61 @@ Planned improvement:
 - Bedrock Anthropic support
 ## Usage 
 ---
 ### LiteLLM Python SDK 
 #### Non-streaming example
 ```python showLineNumbers title="Example using LiteLLM Python SDK"
 import litellm
 response = await litellm.anthropic.messages.acreate(
    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
    api_key=api_key,
    model="anthropic/claude-3-haiku-20240307",
    max_tokens=100,
 )
 ```
 Example response:
 ```json
 {
  "content": [
    {
      "text": "Hi! this is a very short joke",
      "type": "text"
    }
  ],
  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
  "model": "claude-3-7-sonnet-20250219",
  "role": "assistant",
  "stop_reason": "end_turn",
  "stop_sequence": null,
  "type": "message",
  "usage": {
    "input_tokens": 2095,
    "output_tokens": 503,
    "cache_creation_input_tokens": 2095,
    "cache_read_input_tokens": 0
  }
 }
 ```
 #### Streaming example
 ```python showLineNumbers title="Example using LiteLLM Python SDK"
 import litellm
 response = await litellm.anthropic.messages.acreate(
    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
    api_key=api_key,
    model="anthropic/claude-3-haiku-20240307",
    max_tokens=100,
    stream=True,
 )
 async for chunk in response:
    print(chunk)
 ```
 ### LiteLLM Proxy Server 
 <Tabs>
 <TabItem label="PROXY" value="proxy">
 1. Setup config.yaml
@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
 3. Test it! 
-```bash
+<Tabs>
 <TabItem label="Anthropic Python SDK" value="python">
 ```python showLineNumbers title="Example using LiteLLM Proxy Server"
 import anthropic
 # point anthropic sdk to litellm proxy 
 client = anthropic.Anthropic(
    base_url="http://0.0.0.0:4000",
    api_key="sk-1234",
 )
 response = client.messages.create(
    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
    model="anthropic/claude-3-haiku-20240307",
    max_tokens=100,
 )
 ```
 </TabItem>
 <TabItem label="curl" value="curl">
 ```bash showLineNumbers title="Example using LiteLLM Proxy Server"
 curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
 -H 'content-type: application/json' \
 -H 'x-api-key: $LITELLM_API_KEY' \
@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
  "messages": [
    {
      "role": "user",
-      "content": [
+      "content": "Hello, can you tell me a short joke?"
        {
          "type": "text",
          "text": "List 5 important events in the XIX century"
        }
      ]
    }
  ],
-  "max_tokens": 4096
+  "max_tokens": 100
 }'
 ```
 </TabItem>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
 import asyncio 
 import os 
 # set env 
 os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
 messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
 # Call the handler
 async def call(): 
    response = await anthropic_messages(
        messages=messages,
        api_key=api_key,
        model="claude-3-haiku-20240307",
        max_tokens=100,
    )
 asyncio.run(call())
 ```
 </TabItem>
 </Tabs>
 ## Request Format
 ---
 Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
 #### Example request body
 ```json
 {
  "model": "claude-3-7-sonnet-20250219",
  "max_tokens": 1024,
  "messages": [
    {
      "role": "user",
      "content": "Hello, world"
    }
  ]
 }
 ```
 #### Required Fields
 - **model** (string):  
  The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
 - **max_tokens** (integer):  
  The maximum number of tokens to generate before stopping.  
  _Note: The model may stop before reaching this limit; value must be greater than 1._
 - **messages** (array of objects):  
  An ordered list of conversational turns.  
  Each message object must include:
  - **role** (enum: `"user"` or `"assistant"`):  
    Specifies the speaker of the message.
  - **content** (string or array of content blocks):  
    The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.  
    _Example equivalence:_
    ```json
    {"role": "user", "content": "Hello, Claude"}
    ```
    is equivalent to:
    ```json
    {"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
    ```
 #### Optional Fields
 - **metadata** (object):  
  Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
 - **stop_sequences** (array of strings):  
  Custom sequences that, when encountered in the generated text, cause the model to stop.
 - **stream** (boolean):  
  Indicates whether to stream the response using server-sent events.
 - **system** (string or array):  
  A system prompt providing context or specific instructions to the model.
 - **temperature** (number):  
  Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
 - **thinking** (object):  
  Configuration for enabling extended thinking. If enabled, it includes:
  - **budget_tokens** (integer):  
    Minimum of 1024 tokens (and less than `max_tokens`).
  - **type** (enum):  
    E.g., `"enabled"`.
 - **tool_choice** (object):  
  Instructs how the model should utilize any provided tools.
 - **tools** (array of objects):  
  Definitions for tools available to the model. Each tool includes:
  - **name** (string):  
    The tool’s name.
  - **description** (string):  
    A detailed description of the tool.
  - **input_schema** (object):  
    A JSON schema describing the expected input format for the tool.
 - **top_k** (integer):  
  Limits sampling to the top K options.
 - **top_p** (number):  
  Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
 ## Response Format
 ---
 Responses will be in the Anthropic messages API format.
 #### Example Response
 ```json
 {
  "content": [
    {
      "text": "Hi! My name is Claude.",
      "type": "text"
    }
  ],
  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
  "model": "claude-3-7-sonnet-20250219",
  "role": "assistant",
  "stop_reason": "end_turn",
  "stop_sequence": null,
  "type": "message",
  "usage": {
    "input_tokens": 2095,
    "output_tokens": 503,
    "cache_creation_input_tokens": 2095,
    "cache_read_input_tokens": 0
  }
 }
 ```
 #### Response fields
 - **content** (array of objects):  
  Contains the generated content blocks from the model. Each block includes:
  - **type** (string):  
    Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
  - **text** (string):  
    The generated text from the model.  
    _Note: Maximum length is 5,000,000 characters._
  - **citations** (array of objects or `null`):  
    Optional field providing citation details. Each citation includes:
    - **cited_text** (string):  
      The excerpt being cited.
    - **document_index** (integer):  
      An index referencing the cited document.
    - **document_title** (string or `null`):  
      The title of the cited document.
    - **start_char_index** (integer):  
      The starting character index for the citation.
    - **end_char_index** (integer):  
      The ending character index for the citation.
    - **type** (string):  
      Typically `"char_location"`.
 - **id** (string):  
  A unique identifier for the response message.  
  _Note: The format and length of IDs may change over time._
 - **model** (string):  
  Specifies the model that generated the response.
 - **role** (string):  
  Indicates the role of the generated message. For responses, this is always `"assistant"`.
 - **stop_reason** (string):  
  Explains why the model stopped generating text. Possible values include:
  - `"end_turn"`: The model reached a natural stopping point.
  - `"max_tokens"`: The generation stopped because the maximum token limit was reached.
  - `"stop_sequence"`: A custom stop sequence was encountered.
  - `"tool_use"`: The model invoked one or more tools.
 - **stop_sequence** (string or `null`):  
  Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
 - **type** (string):  
  Denotes the type of response object, which is always `"message"`.
 - **usage** (object):  
  Provides details on token usage for billing and rate limiting. This includes:
  - **input_tokens** (integer):  
    Total number of input tokens processed.
  - **output_tokens** (integer):  
    Total number of output tokens generated.
  - **cache_creation_input_tokens** (integer or `null`):  
    Number of tokens used to create a cache entry.
  - **cache_read_input_tokens** (integer or `null`):  
    Number of tokens read from the cache.
--- a/docs/my-website/docs/caching/all_caches.md
+++ b/docs/my-website/docs/caching/all_caches.md
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
 # Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
-[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
+[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching/caching.py)
 :::info
--- a/docs/my-website/docs/enterprise.md
+++ b/docs/my-website/docs/enterprise.md
@ -1,3 +1,5 @@
 import Image from '@theme/IdealImage';
 # Enterprise
 For companies that need SSO, user management and professional support for LiteLLM Proxy
@ -7,6 +9,8 @@ Get free 7-day trial key [here](https://www.litellm.ai/#trial)
 Includes all enterprise features.
 <Image img={require('../img/enterprise_vs_oss.png')} />
 [**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@ -1035,8 +1035,10 @@ response = completion(
            "content": [
                {"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
                {
-                    "type": "image_url",
+                    "type": "file",
-                    "image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
+                    "file": {
                       "file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
                    }
                },
            ],
        }
@ -1081,8 +1083,10 @@ curl http://0.0.0.0:4000/v1/chat/completions \
            "text": "You are a very professional document summarization specialist. Please summarize the given document"
          },
          {
-                "type": "image_url",
+                "type": "file",
-                "image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
+                "file": {
                    "file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
                }
            }
          }
        ]
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -1168,14 +1168,22 @@ os.environ["AWS_REGION_NAME"] = ""
 # pdf url
 image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
 # Download the file
 response = requests.get(url)
 file_data = response.content
 encoded_file = base64.b64encode(file_data).decode("utf-8")
 # model
 model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
 image_content = [
    {"type": "text", "text": "What's this file about?"},
    {
-        "type": "image_url",
+        "type": "file",
-        "image_url": image_url, # OR {"url": image_url}
+        "file": {
            "file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
        }
    },
 ]
@ -1221,8 +1229,10 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
    "messages": [
        {"role": "user", "content": {"type": "text", "text": "What's this file about?"}},
        {
-            "type": "image_url",
+            "type": "file",
-            "image_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
+            "file": {
                "file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
            }
        }
    ]
 }'
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@ -365,7 +365,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 </Tabs>
 ## Specifying Safety Settings 
-In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
+In certain use-cases you may need to make calls to the models and pass [safety settings](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
 ```python
 response = completion(
--- a/docs/my-website/docs/providers/xai.md
+++ b/docs/my-website/docs/providers/xai.md
@ -82,7 +82,7 @@ from litellm import completion
 os.environ["XAI_API_KEY"] = "your-api-key"
 response = completion(
-    model="xai/grok-2-latest",
+    model="xai/grok-2-vision-latest",
    messages=[
        {
            "role": "user",
--- a/docs/my-website/docs/proxy/guardrails/aim_security.md
+++ b/docs/my-website/docs/proxy/guardrails/aim_security.md
@ -23,6 +23,12 @@ In the newly created guard's page, you can find a reference to the prompt policy
 You can decide which detections will be enabled, and set the threshold for each detection.
 :::info 
 When using LiteLLM with virtual keys, key-specific policies can be set directly in Aim's guards page by specifying the virtual key alias when creating the guard.
 Only the aliases of your virtual keys (and not the actual key secrets) will be sent to Aim.
 :::
 ### 3. Add Aim Guardrail on your LiteLLM config.yaml 
 Define your guardrails under the `guardrails` section
--- a/docs/my-website/docs/proxy/guardrails/prompt_injection.md
+++ b/docs/my-website/docs/proxy/guardrails/prompt_injection.md
--- a/docs/my-website/docs/proxy/guardrails/quick_start.md
+++ b/docs/my-website/docs/proxy/guardrails/quick_start.md
@ -17,6 +17,14 @@ model_list:
      api_key: os.environ/OPENAI_API_KEY
 guardrails:
  - guardrail_name: general-guard
    litellm_params:
      guardrail: aim
      mode: [pre_call, post_call]
      api_key: os.environ/AIM_API_KEY
      api_base: os.environ/AIM_API_BASE
      default_on: true # Optional
  - guardrail_name: "aporia-pre-guard"
    litellm_params:
      guardrail: aporia  # supported values: "aporia", "lakera"
@ -45,6 +53,7 @@ guardrails:
 - `pre_call` Run **before** LLM call, on **input**
 - `post_call` Run **after** LLM call, on **input & output**
 - `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call.  Response not returned until guardrail check completes
 - A list of the above values to run multiple modes, e.g. `mode: [pre_call, post_call]`
 ## 2. Start LiteLLM Gateway 
--- a/docs/my-website/img/enterprise_vs_oss.png
+++ b/docs/my-website/img/enterprise_vs_oss.png
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -137,6 +137,7 @@ const sidebars = {
          label: "[Beta] Guardrails",
          items: [
            "proxy/guardrails/quick_start",
            ...[
              "proxy/guardrails/aim_security",
              "proxy/guardrails/aporia_api",
              "proxy/guardrails/bedrock",
@ -145,7 +146,8 @@ const sidebars = {
              "proxy/guardrails/pii_masking_v2",
              "proxy/guardrails/secret_detection",
              "proxy/guardrails/custom_guardrail",
-            "prompt_injection"
+              "proxy/guardrails/prompt_injection",
            ].sort(),
          ],
        },
        {
--- a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2-py3-none-any.whl
+++ b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2-py3-none-any.whl
--- a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2.tar.gz
+++ b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2.tar.gz
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250331215456_track_success_and_failed_requests_daily_agg_table/migration.sql
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250331215456_track_success_and_failed_requests_daily_agg_table/migration.sql
@ -0,0 +1,4 @@
 -- AlterTable
 ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN     "failed_requests" INTEGER NOT NULL DEFAULT 0,
 ADD COLUMN     "successful_requests" INTEGER NOT NULL DEFAULT 0;
--- a/litellm-proxy-extras/poetry.lock
+++ b/litellm-proxy-extras/poetry.lock
@ -0,0 +1,7 @@
 # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 package = []
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
 content-hash = "2cf39473e67ff0615f0a61c9d2ac9f02b38cc08cbb1bdb893d89bee002646623"
--- a/litellm-proxy-extras/pyproject.toml
+++ b/litellm-proxy-extras/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm-proxy-extras"
-version = "0.1.1"
+version = "0.1.2"
 description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
 authors = ["BerriAI"]
 readme = "README.md"
@ -22,7 +22,7 @@ requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 [tool.commitizen]
-version = "0.1.1"
+version = "0.1.2"
 version_files = [
    "pyproject.toml:version",
    "../requirements.txt:litellm-proxy-extras==",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
 ### ADAPTERS ###
 from .types.adapter import AdapterItem
 import litellm.anthropic_interface as anthropic
 adapters: List[AdapterItem] = []
--- a/litellm/_version.py
+++ b/litellm/_version.py
@ -3,4 +3,4 @@ import importlib_metadata
 try:
    version = importlib_metadata.version("litellm")
 except Exception:
-    pass
+    version = "unknown"
--- a/litellm/anthropic_interface/init.py
+++ b/litellm/anthropic_interface/init.py
@ -0,0 +1,6 @@
 """
 Anthropic module for LiteLLM
 """
 from .messages import acreate, create
 __all__ = ["acreate", "create"]
--- a/litellm/anthropic_interface/messages/init.py
+++ b/litellm/anthropic_interface/messages/init.py
@ -0,0 +1,117 @@
 """
 Interface for Anthropic's messages API
 Use this to call LLMs in Anthropic /messages Request/Response format
 This is an __init__.py file to allow the following interface
 - litellm.messages.acreate
 - litellm.messages.create
 """
 from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
 from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
    anthropic_messages as _async_anthropic_messages,
 )
 from litellm.types.llms.anthropic_messages.anthropic_response import (
    AnthropicMessagesResponse,
 )
 async def acreate(
    max_tokens: int,
    messages: List[Dict],
    model: str,
    metadata: Optional[Dict] = None,
    stop_sequences: Optional[List[str]] = None,
    stream: Optional[bool] = False,
    system: Optional[str] = None,
    temperature: Optional[float] = 1.0,
    thinking: Optional[Dict] = None,
    tool_choice: Optional[Dict] = None,
    tools: Optional[List[Dict]] = None,
    top_k: Optional[int] = None,
    top_p: Optional[float] = None,
    **kwargs
 ) -> Union[AnthropicMessagesResponse, AsyncIterator]:
    """
    Async wrapper for Anthropic's messages API
    Args:
        max_tokens (int): Maximum tokens to generate (required)
        messages (List[Dict]): List of message objects with role and content (required)
        model (str): Model name to use (required)
        metadata (Dict, optional): Request metadata
        stop_sequences (List[str], optional): Custom stop sequences
        stream (bool, optional): Whether to stream the response
        system (str, optional): System prompt
        temperature (float, optional): Sampling temperature (0.0 to 1.0)
        thinking (Dict, optional): Extended thinking configuration
        tool_choice (Dict, optional): Tool choice configuration
        tools (List[Dict], optional): List of tool definitions
        top_k (int, optional): Top K sampling parameter
        top_p (float, optional): Nucleus sampling parameter
        **kwargs: Additional arguments
    Returns:
        Dict: Response from the API
    """
    return await _async_anthropic_messages(
        max_tokens=max_tokens,
        messages=messages,
        model=model,
        metadata=metadata,
        stop_sequences=stop_sequences,
        stream=stream,
        system=system,
        temperature=temperature,
        thinking=thinking,
        tool_choice=tool_choice,
        tools=tools,
        top_k=top_k,
        top_p=top_p,
        **kwargs,
    )
 async def create(
    max_tokens: int,
    messages: List[Dict],
    model: str,
    metadata: Optional[Dict] = None,
    stop_sequences: Optional[List[str]] = None,
    stream: Optional[bool] = False,
    system: Optional[str] = None,
    temperature: Optional[float] = 1.0,
    thinking: Optional[Dict] = None,
    tool_choice: Optional[Dict] = None,
    tools: Optional[List[Dict]] = None,
    top_k: Optional[int] = None,
    top_p: Optional[float] = None,
    **kwargs
 ) -> Union[AnthropicMessagesResponse, Iterator]:
    """
    Async wrapper for Anthropic's messages API
    Args:
        max_tokens (int): Maximum tokens to generate (required)
        messages (List[Dict]): List of message objects with role and content (required)
        model (str): Model name to use (required)
        metadata (Dict, optional): Request metadata
        stop_sequences (List[str], optional): Custom stop sequences
        stream (bool, optional): Whether to stream the response
        system (str, optional): System prompt
        temperature (float, optional): Sampling temperature (0.0 to 1.0)
        thinking (Dict, optional): Extended thinking configuration
        tool_choice (Dict, optional): Tool choice configuration
        tools (List[Dict], optional): List of tool definitions
        top_k (int, optional): Top K sampling parameter
        top_p (float, optional): Nucleus sampling parameter
        **kwargs: Additional arguments
    Returns:
        Dict: Response from the API
    """
    raise NotImplementedError("This function is not implemented")
--- a/litellm/anthropic_interface/readme.md
+++ b/litellm/anthropic_interface/readme.md
@ -0,0 +1,116 @@
 ## Use LLM API endpoints in Anthropic Interface
 Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
 ## Usage 
 ---
 ### LiteLLM Python SDK 
 #### Non-streaming example
 ```python showLineNumbers title="Example using LiteLLM Python SDK"
 import litellm
 response = await litellm.anthropic.messages.acreate(
    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
    api_key=api_key,
    model="anthropic/claude-3-haiku-20240307",
    max_tokens=100,
 )
 ```
 Example response:
 ```json
 {
  "content": [
    {
      "text": "Hi! this is a very short joke",
      "type": "text"
    }
  ],
  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
  "model": "claude-3-7-sonnet-20250219",
  "role": "assistant",
  "stop_reason": "end_turn",
  "stop_sequence": null,
  "type": "message",
  "usage": {
    "input_tokens": 2095,
    "output_tokens": 503,
    "cache_creation_input_tokens": 2095,
    "cache_read_input_tokens": 0
  }
 }
 ```
 #### Streaming example
 ```python showLineNumbers title="Example using LiteLLM Python SDK"
 import litellm
 response = await litellm.anthropic.messages.acreate(
    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
    api_key=api_key,
    model="anthropic/claude-3-haiku-20240307",
    max_tokens=100,
    stream=True,
 )
 async for chunk in response:
    print(chunk)
 ```
 ### LiteLLM Proxy Server 
 1. Setup config.yaml
 ```yaml
 model_list:
    - model_name: anthropic-claude
      litellm_params:
        model: claude-3-7-sonnet-latest
 ```
 2. Start proxy 
 ```bash
 litellm --config /path/to/config.yaml
 ```
 3. Test it! 
 <Tabs>
 <TabItem label="Anthropic Python SDK" value="python">
 ```python showLineNumbers title="Example using LiteLLM Proxy Server"
 import anthropic
 # point anthropic sdk to litellm proxy 
 client = anthropic.Anthropic(
    base_url="http://0.0.0.0:4000",
    api_key="sk-1234",
 )
 response = client.messages.create(
    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
    model="anthropic/claude-3-haiku-20240307",
    max_tokens=100,
 )
 ```
 </TabItem>
 <TabItem label="curl" value="curl">
 ```bash showLineNumbers title="Example using LiteLLM Proxy Server"
 curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
 -H 'content-type: application/json' \
 -H 'x-api-key: $LITELLM_API_KEY' \
 -H 'anthropic-version: 2023-06-01' \
 -d '{
  "model": "anthropic-claude",
  "messages": [
    {
      "role": "user",
      "content": "Hello, can you tell me a short joke?"
    }
  ],
  "max_tokens": 100
 }'
 ```
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -19,6 +19,7 @@ DEFAULT_IMAGE_HEIGHT = 300
 MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024  # 1MB = 1024KB
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
 REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
 REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"
 MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
 #### RELIABILITY ####
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -550,6 +550,7 @@ def completion_cost(  # noqa: PLR0915
    custom_pricing: Optional[bool] = None,
    base_model: Optional[str] = None,
    standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
    litellm_model_name: Optional[str] = None,
 ) -> float:
    """
    Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@ -602,7 +603,7 @@ def completion_cost(  # noqa: PLR0915
            completion_response=completion_response
        )
        rerank_billed_units: Optional[RerankBilledUnits] = None
-        model = _select_model_name_for_cost_calc(
+        selected_model = _select_model_name_for_cost_calc(
            model=model,
            completion_response=completion_response,
            custom_llm_provider=custom_llm_provider,
@ -610,16 +611,24 @@ def completion_cost(  # noqa: PLR0915
            base_model=base_model,
        )
-        verbose_logger.info(f"selected model name for cost calculation: {model}")
+        potential_model_names = [selected_model]
        if model is not None:
            potential_model_names.append(model)
        for idx, model in enumerate(potential_model_names):
            try:
                verbose_logger.info(
                    f"selected model name for cost calculation: {model}"
                )
                if completion_response is not None and (
                    isinstance(completion_response, BaseModel)
                    or isinstance(completion_response, dict)
                ):  # tts returns a custom class
                    if isinstance(completion_response, dict):
-                usage_obj: Optional[Union[dict, Usage]] = completion_response.get(
+                        usage_obj: Optional[
-                    "usage", {}
+                            Union[dict, Usage]
-                )
+                        ] = completion_response.get("usage", {})
                    else:
                        usage_obj = getattr(completion_response, "usage", {})
                    if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
@ -638,16 +647,16 @@ def completion_cost(  # noqa: PLR0915
                        _usage = usage_obj
                    if ResponseAPILoggingUtils._is_response_api_usage(_usage):
-                _usage = (
+                        _usage = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
                    ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
                            _usage
                        ).model_dump()
                )
                    # get input/output tokens from completion_response
                    prompt_tokens = _usage.get("prompt_tokens", 0)
                    completion_tokens = _usage.get("completion_tokens", 0)
-            cache_creation_input_tokens = _usage.get("cache_creation_input_tokens", 0)
+                    cache_creation_input_tokens = _usage.get(
                        "cache_creation_input_tokens", 0
                    )
                    cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0)
                    if (
                        "prompt_tokens_details" in _usage
@ -655,7 +664,9 @@ def completion_cost(  # noqa: PLR0915
                        and _usage["prompt_tokens_details"]
                    ):
                        prompt_tokens_details = _usage.get("prompt_tokens_details", {})
-                cache_read_input_tokens = prompt_tokens_details.get("cached_tokens", 0)
+                        cache_read_input_tokens = prompt_tokens_details.get(
                            "cached_tokens", 0
                        )
                    total_time = getattr(completion_response, "_response_ms", 0)
@ -703,7 +714,8 @@ def completion_cost(  # noqa: PLR0915
                if (
                    call_type == CallTypes.image_generation.value
                    or call_type == CallTypes.aimage_generation.value
-            or call_type == PassthroughCallTypes.passthrough_image_generation.value
+                    or call_type
                    == PassthroughCallTypes.passthrough_image_generation.value
                ):
                    ### IMAGE GENERATION COST CALCULATION ###
                    if custom_llm_provider == "vertex_ai":
@ -733,7 +745,8 @@ def completion_cost(  # noqa: PLR0915
                            optional_params=optional_params,
                        )
                elif (
-            call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
+                    call_type == CallTypes.speech.value
                    or call_type == CallTypes.aspeech.value
                ):
                    prompt_characters = litellm.utils._count_characters(text=prompt)
                elif (
@ -744,7 +757,8 @@ def completion_cost(  # noqa: PLR0915
                        completion_response, "duration", 0.0
                    )
                elif (
-            call_type == CallTypes.rerank.value or call_type == CallTypes.arerank.value
+                    call_type == CallTypes.rerank.value
                    or call_type == CallTypes.arerank.value
                ):
                    if completion_response is not None and isinstance(
                        completion_response, RerankResponse
@ -773,7 +787,9 @@ def completion_cost(  # noqa: PLR0915
                    # together ai prices based on size of llm
                    # get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
-            model = get_model_params_and_category(model, call_type=CallTypes(call_type))
+                    model = get_model_params_and_category(
                        model, call_type=CallTypes(call_type)
                    )
                # replicate llms are calculate based on time for request running
                # see https://replicate.com/pricing
@ -788,14 +804,19 @@ def completion_cost(  # noqa: PLR0915
                        f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
                    )
-        if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
+                if (
                    custom_llm_provider is not None
                    and custom_llm_provider == "vertex_ai"
                ):
                    # Calculate the prompt characters + response characters
                    if len(messages) > 0:
                        prompt_string = litellm.utils.get_formatted_prompt(
                            data={"messages": messages}, call_type="completion"
                        )
-                prompt_characters = litellm.utils._count_characters(text=prompt_string)
+                        prompt_characters = litellm.utils._count_characters(
                            text=prompt_string
                        )
                    if completion_response is not None and isinstance(
                        completion_response, ModelResponse
                    ):
@ -823,19 +844,35 @@ def completion_cost(  # noqa: PLR0915
                    cache_creation_input_tokens=cache_creation_input_tokens,
                    cache_read_input_tokens=cache_read_input_tokens,
                    usage_object=cost_per_token_usage_object,
-            call_type=call_type,
+                    call_type=cast(CallTypesLiteral, call_type),
                    audio_transcription_file_duration=audio_transcription_file_duration,
                    rerank_billed_units=rerank_billed_units,
                )
-        _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+                _final_cost = (
-        _final_cost += StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
+                    prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
                )
                _final_cost += (
                    StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
                        model=model,
                        response_object=completion_response,
                        standard_built_in_tools_params=standard_built_in_tools_params,
                        custom_llm_provider=custom_llm_provider,
                    )
-
+                )
                return _final_cost
            except Exception as e:
                verbose_logger.debug(
                    "litellm.cost_calculator.py::completion_cost() - Error calculating cost for model={} - {}".format(
                        model, str(e)
                    )
                )
                if idx == len(potential_model_names) - 1:
                    raise e
        raise Exception(
            "Unable to calculat cost for received potential model names - {}".format(
                potential_model_names
            )
        )
    except Exception as e:
        raise e
@ -897,6 +934,7 @@ def response_cost_calculator(
    custom_pricing: Optional[bool] = None,
    prompt: str = "",
    standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
    litellm_model_name: Optional[str] = None,
 ) -> float:
    """
    Returns
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -290,6 +290,7 @@ class Logging(LiteLLMLoggingBaseClass):
            "input": _input,
            "litellm_params": litellm_params,
            "applied_guardrails": applied_guardrails,
            "model": model,
        }
    def process_dynamic_callbacks(self):
@ -892,6 +893,7 @@ class Logging(LiteLLMLoggingBaseClass):
            ResponseCompletedEvent,
        ],
        cache_hit: Optional[bool] = None,
        litellm_model_name: Optional[str] = None,
    ) -> Optional[float]:
        """
        Calculate response cost using result + logging object variables.
@ -917,7 +919,7 @@ class Logging(LiteLLMLoggingBaseClass):
        try:
            response_cost_calculator_kwargs = {
                "response_object": result,
-                "model": self.model,
+                "model": litellm_model_name or self.model,
                "cache_hit": cache_hit,
                "custom_llm_provider": self.model_call_details.get(
                    "custom_llm_provider", None
@ -1009,6 +1011,10 @@ class Logging(LiteLLMLoggingBaseClass):
                return False
        return True
    def _update_completion_start_time(self, completion_start_time: datetime.datetime):
        self.completion_start_time = completion_start_time
        self.model_call_details["completion_start_time"] = self.completion_start_time
    def _success_handler_helper_fn(
        self,
        result=None,
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@ -22,6 +22,7 @@ from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionAssistantMessage,
    ChatCompletionAssistantToolCall,
    ChatCompletionFileObject,
    ChatCompletionFunctionMessage,
    ChatCompletionImageObject,
    ChatCompletionTextObject,
@ -1455,6 +1456,25 @@ def anthropic_messages_pt(  # noqa: PLR0915
                            user_content.append(_content_element)
                        elif m.get("type", "") == "document":
                            user_content.append(cast(AnthropicMessagesDocumentParam, m))
                        elif m.get("type", "") == "file":
                            file_message = cast(ChatCompletionFileObject, m)
                            file_data = file_message["file"].get("file_data")
                            if file_data:
                                image_chunk = convert_to_anthropic_image_obj(
                                    openai_image_url=file_data,
                                    format=file_message["file"].get("format"),
                                )
                                anthropic_document_param = (
                                    AnthropicMessagesDocumentParam(
                                        type="document",
                                        source=AnthropicContentParamSource(
                                            type="base64",
                                            media_type=image_chunk["media_type"],
                                            data=image_chunk["data"],
                                        ),
                                    )
                                )
                                user_content.append(anthropic_document_param)
                elif isinstance(user_message_types_block["content"], str):
                    _anthropic_content_text_element: AnthropicMessagesTextParam = {
                        "type": "text",
@ -2885,6 +2905,11 @@ class BedrockConverseMessagesProcessor:
                                    image_url=image_url, format=format
                                )
                                _parts.append(_part)  # type: ignore
                            elif element["type"] == "file":
                                _part = await BedrockConverseMessagesProcessor._async_process_file_message(
                                    message=cast(ChatCompletionFileObject, element)
                                )
                                _parts.append(_part)
                            _cache_point_block = (
                                litellm.AmazonConverseConfig()._get_cache_point_block(
                                    message_block=cast(
@ -3054,6 +3079,45 @@ class BedrockConverseMessagesProcessor:
            reasoning_content_blocks.append(bedrock_content_block)
        return reasoning_content_blocks
    @staticmethod
    def _process_file_message(message: ChatCompletionFileObject) -> BedrockContentBlock:
        file_message = message["file"]
        file_data = file_message.get("file_data")
        file_id = file_message.get("file_id")
        if file_data is None and file_id is None:
            raise litellm.BadRequestError(
                message="file_data and file_id cannot both be None. Got={}".format(
                    message
                ),
                model="",
                llm_provider="bedrock",
            )
        format = file_message.get("format")
        return BedrockImageProcessor.process_image_sync(
            image_url=cast(str, file_id or file_data), format=format
        )
    @staticmethod
    async def _async_process_file_message(
        message: ChatCompletionFileObject,
    ) -> BedrockContentBlock:
        file_message = message["file"]
        file_data = file_message.get("file_data")
        file_id = file_message.get("file_id")
        format = file_message.get("format")
        if file_data is None and file_id is None:
            raise litellm.BadRequestError(
                message="file_data and file_id cannot both be None. Got={}".format(
                    message
                ),
                model="",
                llm_provider="bedrock",
            )
        return await BedrockImageProcessor.process_image_async(
            image_url=cast(str, file_id or file_data), format=format
        )
 def _bedrock_converse_messages_pt(  # noqa: PLR0915
    messages: List,
@ -3126,6 +3190,13 @@ def _bedrock_converse_messages_pt(  # noqa: PLR0915
                                format=format,
                            )
                            _parts.append(_part)  # type: ignore
                        elif element["type"] == "file":
                            _part = (
                                BedrockConverseMessagesProcessor._process_file_message(
                                    message=cast(ChatCompletionFileObject, element)
                                )
                            )
                            _parts.append(_part)
                        _cache_point_block = (
                            litellm.AmazonConverseConfig()._get_cache_point_block(
                                message_block=cast(
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@ -1,5 +1,6 @@
 import asyncio
 import collections.abc
 import datetime
 import json
 import threading
 import time
@ -1567,6 +1568,10 @@ class CustomStreamWrapper:
                    if response is None:
                        continue
                    if self.logging_obj.completion_start_time is None:
                        self.logging_obj._update_completion_start_time(
                            completion_start_time=datetime.datetime.now()
                        )
                    ## LOGGING
                    executor.submit(
                        self.run_success_logging_and_cache_storage,
@ -1721,6 +1726,11 @@ class CustomStreamWrapper:
                    if processed_chunk is None:
                        continue
                    if self.logging_obj.completion_start_time is None:
                        self.logging_obj._update_completion_start_time(
                            completion_start_time=datetime.datetime.now()
                        )
                    choice = processed_chunk.choices[0]
                    if isinstance(choice, StreamingChoices):
                        self.response_uptil_now += choice.delta.get("content", "") or ""
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -18,8 +18,10 @@ from litellm.types.llms.anthropic import (
    AnthropicMessagesTool,
    AnthropicMessagesToolChoice,
    AnthropicSystemMessageContent,
    AnthropicThinkingParam,
 )
 from litellm.types.llms.openai import (
    REASONING_EFFORT,
    AllMessageValues,
    ChatCompletionCachedContent,
    ChatCompletionSystemMessage,
@ -94,6 +96,7 @@ class AnthropicConfig(BaseConfig):
            "parallel_tool_calls",
            "response_format",
            "user",
            "reasoning_effort",
        ]
        if "claude-3-7-sonnet" in model:
@ -141,15 +144,9 @@ class AnthropicConfig(BaseConfig):
        if user_anthropic_beta_headers is not None:
            betas.update(user_anthropic_beta_headers)
-        # Handle beta headers for Vertex AI
+        # Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
        # We allow prompt caching beta header for Vertex, but exclude other beta headers that might cause issues
        if is_vertex_request is True:
-            vertex_safe_betas = set()
+            pass
            # Allow prompt caching beta header for Vertex
            if "prompt-caching-2024-07-31" in betas:
                vertex_safe_betas.add("prompt-caching-2024-07-31")
            if len(vertex_safe_betas) > 0:
                headers["anthropic-beta"] = ",".join(vertex_safe_betas)
        elif len(betas) > 0:
            headers["anthropic-beta"] = ",".join(betas)
@ -297,6 +294,21 @@ class AnthropicConfig(BaseConfig):
                new_stop = new_v
        return new_stop
    @staticmethod
    def _map_reasoning_effort(
        reasoning_effort: Optional[Union[REASONING_EFFORT, str]]
    ) -> Optional[AnthropicThinkingParam]:
        if reasoning_effort is None:
            return None
        elif reasoning_effort == "low":
            return AnthropicThinkingParam(type="enabled", budget_tokens=1024)
        elif reasoning_effort == "medium":
            return AnthropicThinkingParam(type="enabled", budget_tokens=2048)
        elif reasoning_effort == "high":
            return AnthropicThinkingParam(type="enabled", budget_tokens=4096)
        else:
            raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
    def map_openai_params(
        self,
        non_default_params: dict,
@ -308,10 +320,6 @@ class AnthropicConfig(BaseConfig):
            non_default_params=non_default_params
        )
        ## handle thinking tokens
        self.update_optional_params_with_thinking_tokens(
            non_default_params=non_default_params, optional_params=optional_params
        )
        for param, value in non_default_params.items():
            if param == "max_tokens":
                optional_params["max_tokens"] = value
@ -376,7 +384,15 @@ class AnthropicConfig(BaseConfig):
                optional_params["metadata"] = {"user_id": value}
            if param == "thinking":
                optional_params["thinking"] = value
            elif param == "reasoning_effort" and isinstance(value, str):
                optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
                    value
                )
        ## handle thinking tokens
        self.update_optional_params_with_thinking_tokens(
            non_default_params=non_default_params, optional_params=optional_params
        )
        return optional_params
    def _create_json_tool_call_for_response_format(
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@ -6,7 +6,7 @@
 """
 import json
-from typing import Any, AsyncIterator, Dict, Optional, Union, cast
+from typing import AsyncIterator, Dict, List, Optional, Union, cast
 import httpx
@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
 )
 from litellm.types.llms.anthropic_messages.anthropic_response import (
    AnthropicMessagesResponse,
 )
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import ProviderSpecificHeader
 from litellm.utils import ProviderConfigManager, client
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
@client
 async def anthropic_messages(
-    api_key: str,
+    max_tokens: int,
    messages: List[Dict],
    model: str,
-    stream: bool = False,
+    metadata: Optional[Dict] = None,
    stop_sequences: Optional[List[str]] = None,
    stream: Optional[bool] = False,
    system: Optional[str] = None,
    temperature: Optional[float] = None,
    thinking: Optional[Dict] = None,
    tool_choice: Optional[Dict] = None,
    tools: Optional[List[Dict]] = None,
    top_k: Optional[int] = None,
    top_p: Optional[float] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    client: Optional[AsyncHTTPHandler] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs,
-) -> Union[Dict[str, Any], AsyncIterator]:
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
    """
    Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
    """
@ -129,10 +143,8 @@ async def anthropic_messages(
        },
        custom_llm_provider=_custom_llm_provider,
    )
    litellm_logging_obj.model_call_details.update(kwargs)
    # Prepare request body
-    request_body = kwargs.copy()
+    request_body = locals().copy()
    request_body = {
        k: v
        for k, v in request_body.items()
@ -140,10 +152,12 @@ async def anthropic_messages(
        in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
            model=model
        )
        and v is not None
    }
    request_body["stream"] = stream
    request_body["model"] = model
    litellm_logging_obj.stream = stream
    litellm_logging_obj.model_call_details.update(request_body)
    # Make the request
    request_url = anthropic_messages_provider_config.get_complete_url(
@ -164,7 +178,7 @@ async def anthropic_messages(
        url=request_url,
        headers=headers,
        data=json.dumps(request_body),
-        stream=stream,
+        stream=stream or False,
    )
    response.raise_for_status()
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@ -104,7 +104,10 @@ class BaseConfig(ABC):
        return type_to_response_format_param(response_format=response_format)
    def is_thinking_enabled(self, non_default_params: dict) -> bool:
-        return non_default_params.get("thinking", {}).get("type", None) == "enabled"
+        return (
            non_default_params.get("thinking", {}).get("type") == "enabled"
            or non_default_params.get("reasoning_effort") is not None
        )
    def update_optional_params_with_thinking_tokens(
        self, non_default_params: dict, optional_params: dict
@ -116,9 +119,9 @@ class BaseConfig(ABC):
        if 'thinking' is enabled and 'max_tokens' is not specified, set 'max_tokens' to the thinking token budget + DEFAULT_MAX_TOKENS
        """
-        is_thinking_enabled = self.is_thinking_enabled(non_default_params)
+        is_thinking_enabled = self.is_thinking_enabled(optional_params)
        if is_thinking_enabled and "max_tokens" not in non_default_params:
-            thinking_token_budget = cast(dict, non_default_params["thinking"]).get(
+            thinking_token_budget = cast(dict, optional_params["thinking"]).get(
                "budget_tokens", None
            )
            if thinking_token_budget is not None:
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -17,6 +17,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
    _bedrock_converse_messages_pt,
    _bedrock_tools_pt,
 )
 from litellm.llms.anthropic.chat.transformation import AnthropicConfig
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.types.llms.bedrock import *
 from litellm.types.llms.openai import (
@ -128,6 +129,7 @@ class AmazonConverseConfig(BaseConfig):
            "claude-3-7" in model
        ):  # [TODO]: move to a 'supports_reasoning_content' param from model cost map
            supported_params.append("thinking")
            supported_params.append("reasoning_effort")
        return supported_params
    def map_tool_choice_values(
@ -218,9 +220,7 @@ class AmazonConverseConfig(BaseConfig):
        messages: Optional[List[AllMessageValues]] = None,
    ) -> dict:
        is_thinking_enabled = self.is_thinking_enabled(non_default_params)
-        self.update_optional_params_with_thinking_tokens(
+
            non_default_params=non_default_params, optional_params=optional_params
        )
        for param, value in non_default_params.items():
            if param == "response_format" and isinstance(value, dict):
                ignore_response_format_types = ["text"]
@ -297,6 +297,14 @@ class AmazonConverseConfig(BaseConfig):
                    optional_params["tool_choice"] = _tool_choice_value
            if param == "thinking":
                optional_params["thinking"] = value
            elif param == "reasoning_effort" and isinstance(value, str):
                optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
                    value
                )
        self.update_optional_params_with_thinking_tokens(
            non_default_params=non_default_params, optional_params=optional_params
        )
        return optional_params
--- a/litellm/llms/openrouter/chat/transformation.py
+++ b/litellm/llms/openrouter/chat/transformation.py
@ -12,6 +12,7 @@ import httpx
 from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.types.llms.openrouter import OpenRouterErrorMessage
 from litellm.types.utils import ModelResponse, ModelResponseStream
 from ...openai.chat.gpt_transformation import OpenAIGPTConfig
@ -71,6 +72,24 @@ class OpenrouterConfig(OpenAIGPTConfig):
 class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
    def chunk_parser(self, chunk: dict) -> ModelResponseStream:
        try:
            ## HANDLE ERROR IN CHUNK ##
            if "error" in chunk:
                error_chunk = chunk["error"]
                error_message = OpenRouterErrorMessage(
                    message="Message: {}, Metadata: {}, User ID: {}".format(
                        error_chunk["message"],
                        error_chunk.get("metadata", {}),
                        error_chunk.get("user_id", ""),
                    ),
                    code=error_chunk["code"],
                    metadata=error_chunk.get("metadata", {}),
                )
                raise OpenRouterException(
                    message=error_message["message"],
                    status_code=error_message["code"],
                    headers=error_message["metadata"].get("headers", {}),
                )
            new_choices = []
            for choice in chunk["choices"]:
                choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
--- a/litellm/llms/sagemaker/common_utils.py
+++ b/litellm/llms/sagemaker/common_utils.py
@ -127,12 +127,17 @@ class AWSEventStreamDecoder:
        async for chunk in iterator:
            event_stream_buffer.add_data(chunk)
            for event in event_stream_buffer:
                try:
                    message = self._parse_message_from_event(event)
                    if message:
-                    verbose_logger.debug("sagemaker  parsed chunk bytes %s", message)
+                        verbose_logger.debug(
                            "sagemaker  parsed chunk bytes %s", message
                        )
                        # remove data: prefix and "\n\n" at the end
                        message = (
-                        litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message)
+                            litellm.CustomStreamWrapper._strip_sse_data_from_chunk(
                                message
                            )
                            or ""
                        )
                        message = message.replace("\n\n", "")
@ -141,7 +146,6 @@ class AWSEventStreamDecoder:
                        accumulated_json += message
                        # Try to parse the accumulated JSON
                    try:
                        _data = json.loads(accumulated_json)
                        if self.is_messages_api:
                            yield self._chunk_parser_messages_api(chunk_data=_data)
@ -152,6 +156,16 @@ class AWSEventStreamDecoder:
                except json.JSONDecodeError:
                    # If it's not valid JSON yet, continue to the next event
                    continue
                except UnicodeDecodeError as e:
                    verbose_logger.warning(
                        f"UnicodeDecodeError: {e}. Attempting to combine with next event."
                    )
                    continue
                except Exception as e:
                    verbose_logger.error(
                        f"Error parsing message: {e}. Attempting to combine with next event."
                    )
                    continue
        # Handle any remaining data after the iterator is exhausted
        if accumulated_json:
@ -167,6 +181,8 @@ class AWSEventStreamDecoder:
                    f"Warning: Unparseable JSON data remained: {accumulated_json}"
                )
                yield None
            except Exception as e:
                verbose_logger.error(f"Final error parsing accumulated JSON: {e}")
    def _parse_message_from_event(self, event) -> Optional[str]:
        response_dict = event.to_response_dict()
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -4453,6 +4453,42 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
        "supports_tool_choice": true
    },
    "gemini-2.5-pro-exp-03-25": {
        "max_tokens": 65536,
        "max_input_tokens": 1048576,
        "max_output_tokens": 65536,
        "max_images_per_prompt": 3000,
        "max_videos_per_prompt": 10,
        "max_video_length": 1,
        "max_audio_length_hours": 8.4,
        "max_audio_per_prompt": 1,
        "max_pdf_size_mb": 30,
        "input_cost_per_image": 0,
        "input_cost_per_video_per_second": 0,
        "input_cost_per_audio_per_second": 0,
        "input_cost_per_token": 0,
        "input_cost_per_character": 0, 
        "input_cost_per_token_above_128k_tokens": 0, 
        "input_cost_per_character_above_128k_tokens": 0, 
        "input_cost_per_image_above_128k_tokens": 0,
        "input_cost_per_video_per_second_above_128k_tokens": 0,
        "input_cost_per_audio_per_second_above_128k_tokens": 0,
        "output_cost_per_token": 0,
        "output_cost_per_character": 0,
        "output_cost_per_token_above_128k_tokens": 0,
        "output_cost_per_character_above_128k_tokens": 0,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_audio_input": true,
        "supports_video_input": true,
        "supports_pdf_input": true,
        "supports_response_schema": true,
        "supports_tool_choice": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
    "gemini-2.0-pro-exp-02-05": {
        "max_tokens": 8192,
        "max_input_tokens": 2097152,
@ -10189,6 +10225,22 @@
        "litellm_provider": "voyage",
        "mode": "rerank"
    },
    "databricks/databricks-claude-3-7-sonnet": {
        "max_tokens": 200000,
        "max_input_tokens": 200000,
        "max_output_tokens": 128000, 
        "input_cost_per_token": 0.0000025,
        "input_dbu_cost_per_token": 0.00003571,
        "output_cost_per_token": 0.00017857,
        "output_db_cost_per_token": 0.000214286,
        "litellm_provider": "databricks",
        "mode": "chat",
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
        "supports_assistant_prefill": true,
        "supports_function_calling": true,
        "supports_tool_choice": true
    },
    "databricks/databricks-meta-llama-3-1-405b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
@ -10217,7 +10269,7 @@
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
        "supports_tool_choice": true
    },
-    "databricks/meta-llama-3.3-70b-instruct": {
+    "databricks/databricks-meta-llama-3-3-70b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
--- a/litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/Yb50LG5p7c9QpG54GIoFV/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/250-601568e45a5ffece.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-601568e45a5ffece.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/250-dfc03a6fb4f0d254.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-dfc03a6fb4f0d254.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/274-bddaf0cf6c91e72f.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/274-bddaf0cf6c91e72f.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/394-48a36e9c9b2cb488.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/394-48a36e9c9b2cb488.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/699-87224ecba28f1f48.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/699-87224ecba28f1f48.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-2bf7a26db5342dbf.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-2bf7a26db5342dbf.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-0f46d4a8b9bdf1c0.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-0f46d4a8b9bdf1c0.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-e21d4be3d6c3c16e.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-e21d4be3d6c3c16e.js
--- a/litellm/proxy/_experimental/out/_next/static/css/169f9187db1ec37e.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/169f9187db1ec37e.css
--- a/litellm/proxy/_experimental/out/_next/static/css/1f6915676624c422.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/1f6915676624c422.css
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/1f6915676624c422.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[38411,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"274\",\"static/chunks/274-bddaf0cf6c91e72f.js\",\"250\",\"static/chunks/250-dfc03a6fb4f0d254.js\",\"699\",\"static/chunks/699-87224ecba28f1f48.js\",\"931\",\"static/chunks/app/page-0f46d4a8b9bdf1c0.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Yb50LG5p7c9QpG54GIoFV\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/1f6915676624c422.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[20314,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-48a36e9c9b2cb488.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-e21d4be3d6c3c16e.js"],"default",1]
+3:I[38411,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","274","static/chunks/274-bddaf0cf6c91e72f.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","931","static/chunks/app/page-0f46d4a8b9bdf1c0.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
+3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-601568e45a5ffece.js","461","static/chunks/app/onboarding/page-5110f2c6a3c9a2f4.js"],"default",1]
+3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-dfc03a6fb4f0d254.js","461","static/chunks/app/onboarding/page-2bf7a26db5342dbf.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -20,14 +20,20 @@ model_list:
    litellm_params:
      model: gemini/gemini-2.0-flash
      api_key: os.environ/GEMINI_API_KEY
  - model_name: openrouter_model
    litellm_params:
      model: openrouter/openrouter_model
      api_key: os.environ/OPENROUTER_API_KEY
      api_base: http://0.0.0.0:8090
 litellm_settings:
  num_retries: 0
  callbacks: ["prometheus"]
  # json_logs: true
-# router_settings:
+router_settings:
-#   routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
+  routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
-#   redis_host: os.environ/REDIS_HOST
+  redis_host: os.environ/REDIS_HOST
-#   redis_password: os.environ/REDIS_PASSWORD
+  redis_password: os.environ/REDIS_PASSWORD
-#   redis_port: os.environ/REDIS_PORT
+  redis_port: os.environ/REDIS_PORT
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -432,6 +432,7 @@ class LiteLLMRoutes(enum.Enum):
        "/model/new",
        "/model/update",
        "/model/delete",
        "/user/daily/activity",
    ]  # routes that manage their own allowed/disallowed logic
    ## Org Admin Routes ##
@ -2736,6 +2737,8 @@ class DailyUserSpendTransaction(TypedDict):
    completion_tokens: int
    spend: float
    api_requests: int
    successful_requests: int
    failed_requests: int
 class DBSpendUpdateTransactions(TypedDict):
@ -2749,3 +2752,9 @@ class DBSpendUpdateTransactions(TypedDict):
    team_list_transactions: Optional[Dict[str, float]]
    team_member_list_transactions: Optional[Dict[str, float]]
    org_list_transactions: Optional[Dict[str, float]]
 class SpendUpdateQueueItem(TypedDict, total=False):
    entity_type: Litellm_EntityType
    entity_id: str
    response_cost: Optional[float]
--- a/litellm/proxy/auth/service_account_checks.py
+++ b/litellm/proxy/auth/service_account_checks.py
@ -1,53 +0,0 @@
 """
 Checks for LiteLLM service account keys
 """
 from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
 def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
    """
    Checks if the token is a service account
    Returns:
        bool: True if token is a service account
    """
    if valid_token.metadata:
        if "service_account_id" in valid_token.metadata:
            return True
    return False
 async def service_account_checks(
    valid_token: UserAPIKeyAuth, request_data: dict
 ) -> bool:
    """
    If a virtual key is a service account, checks it's a valid service account
    A token is a service account if it has a service_account_id in its metadata
    Service Account Specific Checks:
        - Check if required_params is set
    """
    if check_if_token_is_service_account(valid_token) is not True:
        return True
    from litellm.proxy.proxy_server import general_settings
    if "service_account_settings" in general_settings:
        service_account_settings = general_settings["service_account_settings"]
        if "enforced_params" in service_account_settings:
            _enforced_params = service_account_settings["enforced_params"]
            for param in _enforced_params:
                if param not in request_data:
                    raise ProxyException(
                        type=ProxyErrorTypes.bad_request_error.value,
                        code=400,
                        param=param,
                        message=f"BadRequest please pass param={param} in request body. This is a required param for service account",
                    )
    return True
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -49,7 +49,6 @@ from litellm.proxy.auth.auth_utils import (
 from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler
 from litellm.proxy.auth.oauth2_check import check_oauth2_token
 from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
 from litellm.proxy.auth.service_account_checks import service_account_checks
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.utils import PrismaClient, ProxyLogging
 from litellm.types.services import ServiceTypes
@ -905,12 +904,6 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
            else:
                _team_obj = None
            # Check 7: Check if key is a service account key
            await service_account_checks(
                valid_token=valid_token,
                request_data=request_data,
            )
            user_api_key_cache.set_cache(
                key=valid_token.team_id, value=_team_obj
            )  # save team table in cache - used for tpm/rpm limiting - tpm_rpm_limiter.py
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
        """
        Common request processing logic for both chat completions and responses API endpoints
        """
        verbose_proxy_logger.debug(
            "Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
        )
--- a/litellm/proxy/common_utils/http_parsing_utils.py
+++ b/litellm/proxy/common_utils/http_parsing_utils.py
@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
 def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
    if request is None:
        return None
-    if hasattr(request, "scope") and "parsed_body" in request.scope:
+    if (
-        return request.scope["parsed_body"]
+        hasattr(request, "scope")
        and "parsed_body" in request.scope
        and isinstance(request.scope["parsed_body"], tuple)
    ):
        accepted_keys, parsed_body = request.scope["parsed_body"]
        return {key: parsed_body[key] for key in accepted_keys}
    return None
@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
    try:
        if request is None:
            return
-        request.scope["parsed_body"] = parsed_body
+        request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
    except Exception as e:
        verbose_proxy_logger.debug(
            "Unexpected error setting request parsed body - {}".format(e)
--- a/litellm/proxy/db/db_spend_update_writer.py
+++ b/litellm/proxy/db/db_spend_update_writer.py
@ -10,7 +10,7 @@ import os
 import time
 import traceback
 from datetime import datetime, timedelta
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
 import litellm
 from litellm._logging import verbose_proxy_logger
@ -18,13 +18,19 @@ from litellm.caching import DualCache, RedisCache
 from litellm.constants import DB_SPEND_UPDATE_JOB_NAME
 from litellm.proxy._types import (
    DB_CONNECTION_ERROR_TYPES,
    DailyUserSpendTransaction,
    DBSpendUpdateTransactions,
    Litellm_EntityType,
    LiteLLM_UserTable,
    SpendLogsPayload,
    SpendUpdateQueueItem,
 )
-from litellm.proxy.db.pod_lock_manager import PodLockManager
+from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
-from litellm.proxy.db.redis_update_buffer import RedisUpdateBuffer
+    DailySpendUpdateQueue,
 )
 from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
 from litellm.proxy.db.db_transaction_queue.redis_update_buffer import RedisUpdateBuffer
 from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 if TYPE_CHECKING:
    from litellm.proxy.utils import PrismaClient, ProxyLogging
@ -48,10 +54,12 @@ class DBSpendUpdateWriter:
        self.redis_cache = redis_cache
        self.redis_update_buffer = RedisUpdateBuffer(redis_cache=self.redis_cache)
        self.pod_lock_manager = PodLockManager(cronjob_id=DB_SPEND_UPDATE_JOB_NAME)
        self.spend_update_queue = SpendUpdateQueue()
        self.daily_spend_update_queue = DailySpendUpdateQueue()
    @staticmethod
    async def update_database(
        # LiteLLM management object fields
        self,
        token: Optional[str],
        user_id: Optional[str],
        end_user_id: Optional[str],
@ -84,7 +92,7 @@ class DBSpendUpdateWriter:
                hashed_token = token
            asyncio.create_task(
-                DBSpendUpdateWriter._update_user_db(
+                self._update_user_db(
                    response_cost=response_cost,
                    user_id=user_id,
                    prisma_client=prisma_client,
@ -94,14 +102,14 @@ class DBSpendUpdateWriter:
                )
            )
            asyncio.create_task(
-                DBSpendUpdateWriter._update_key_db(
+                self._update_key_db(
                    response_cost=response_cost,
                    hashed_token=hashed_token,
                    prisma_client=prisma_client,
                )
            )
            asyncio.create_task(
-                DBSpendUpdateWriter._update_team_db(
+                self._update_team_db(
                    response_cost=response_cost,
                    team_id=team_id,
                    user_id=user_id,
@ -109,14 +117,14 @@ class DBSpendUpdateWriter:
                )
            )
            asyncio.create_task(
-                DBSpendUpdateWriter._update_org_db(
+                self._update_org_db(
                    response_cost=response_cost,
                    org_id=org_id,
                    prisma_client=prisma_client,
                )
            )
            if disable_spend_logs is False:
-                await DBSpendUpdateWriter._insert_spend_log_to_db(
+                await self._insert_spend_log_to_db(
                    kwargs=kwargs,
                    completion_response=completion_response,
                    start_time=start_time,
@ -135,56 +143,8 @@ class DBSpendUpdateWriter:
                f"Error updating Prisma database: {traceback.format_exc()}"
            )
    @staticmethod
    async def _update_transaction_list(
        response_cost: Optional[float],
        entity_id: Optional[str],
        transaction_list: dict,
        entity_type: Litellm_EntityType,
        debug_msg: Optional[str] = None,
        prisma_client: Optional[PrismaClient] = None,
    ) -> bool:
        """
        Common helper method to update a transaction list for an entity
        Args:
            response_cost: The cost to add
            entity_id: The ID of the entity to update
            transaction_list: The transaction list dictionary to update
            entity_type: The type of entity (from EntityType enum)
            debug_msg: Optional custom debug message
        Returns:
            bool: True if update happened, False otherwise
        """
        try:
            if debug_msg:
                verbose_proxy_logger.debug(debug_msg)
            else:
                verbose_proxy_logger.debug(
                    f"adding spend to {entity_type.value} db. Response cost: {response_cost}. {entity_type.value}_id: {entity_id}."
                )
            if prisma_client is None:
                return False
            if entity_id is None:
                verbose_proxy_logger.debug(
                    f"track_cost_callback: {entity_type.value}_id is None. Not tracking spend for {entity_type.value}"
                )
                return False
            transaction_list[entity_id] = response_cost + transaction_list.get(
                entity_id, 0
            )
            return True
        except Exception as e:
            verbose_proxy_logger.info(
                f"Update {entity_type.value.capitalize()} DB failed to execute - {str(e)}\n{traceback.format_exc()}"
            )
            raise e
    @staticmethod
    async def _update_key_db(
        self,
        response_cost: Optional[float],
        hashed_token: Optional[str],
        prisma_client: Optional[PrismaClient],
@ -193,13 +153,12 @@ class DBSpendUpdateWriter:
            if hashed_token is None or prisma_client is None:
                return
-            await DBSpendUpdateWriter._update_transaction_list(
+            await self.spend_update_queue.add_update(
-                response_cost=response_cost,
+                update=SpendUpdateQueueItem(
                entity_id=hashed_token,
                transaction_list=prisma_client.key_list_transactions,
                    entity_type=Litellm_EntityType.KEY,
-                debug_msg=f"adding spend to key db. Response cost: {response_cost}. Token: {hashed_token}.",
+                    entity_id=hashed_token,
-                prisma_client=prisma_client,
+                    response_cost=response_cost,
                )
            )
        except Exception as e:
            verbose_proxy_logger.exception(
@ -207,8 +166,8 @@ class DBSpendUpdateWriter:
            )
            raise e
    @staticmethod
    async def _update_user_db(
        self,
        response_cost: Optional[float],
        user_id: Optional[str],
        prisma_client: Optional[PrismaClient],
@ -234,21 +193,21 @@ class DBSpendUpdateWriter:
                for _id in user_ids:
                    if _id is not None:
-                        await DBSpendUpdateWriter._update_transaction_list(
+                        await self.spend_update_queue.add_update(
-                            response_cost=response_cost,
+                            update=SpendUpdateQueueItem(
                            entity_id=_id,
                            transaction_list=prisma_client.user_list_transactions,
                                entity_type=Litellm_EntityType.USER,
-                            prisma_client=prisma_client,
+                                entity_id=_id,
                                response_cost=response_cost,
                            )
                        )
                if end_user_id is not None:
-                    await DBSpendUpdateWriter._update_transaction_list(
+                    await self.spend_update_queue.add_update(
-                        response_cost=response_cost,
+                        update=SpendUpdateQueueItem(
                        entity_id=end_user_id,
                        transaction_list=prisma_client.end_user_list_transactions,
                            entity_type=Litellm_EntityType.END_USER,
-                        prisma_client=prisma_client,
+                            entity_id=end_user_id,
                            response_cost=response_cost,
                        )
                    )
        except Exception as e:
            verbose_proxy_logger.info(
@ -256,8 +215,8 @@ class DBSpendUpdateWriter:
                + f"Update User DB call failed to execute {str(e)}\n{traceback.format_exc()}"
            )
    @staticmethod
    async def _update_team_db(
        self,
        response_cost: Optional[float],
        team_id: Optional[str],
        user_id: Optional[str],
@ -270,12 +229,12 @@ class DBSpendUpdateWriter:
                )
                return
-            await DBSpendUpdateWriter._update_transaction_list(
+            await self.spend_update_queue.add_update(
-                response_cost=response_cost,
+                update=SpendUpdateQueueItem(
                entity_id=team_id,
                transaction_list=prisma_client.team_list_transactions,
                    entity_type=Litellm_EntityType.TEAM,
-                prisma_client=prisma_client,
+                    entity_id=team_id,
                    response_cost=response_cost,
                )
            )
            try:
@ -283,12 +242,12 @@ class DBSpendUpdateWriter:
                if user_id is not None:
                    # key is "team_id::<value>::user_id::<value>"
                    team_member_key = f"team_id::{team_id}::user_id::{user_id}"
-                    await DBSpendUpdateWriter._update_transaction_list(
+                    await self.spend_update_queue.add_update(
-                        response_cost=response_cost,
+                        update=SpendUpdateQueueItem(
                        entity_id=team_member_key,
                        transaction_list=prisma_client.team_member_list_transactions,
                            entity_type=Litellm_EntityType.TEAM_MEMBER,
-                        prisma_client=prisma_client,
+                            entity_id=team_member_key,
                            response_cost=response_cost,
                        )
                    )
            except Exception:
                pass
@ -298,8 +257,8 @@ class DBSpendUpdateWriter:
            )
            raise e
    @staticmethod
    async def _update_org_db(
        self,
        response_cost: Optional[float],
        org_id: Optional[str],
        prisma_client: Optional[PrismaClient],
@ -311,12 +270,12 @@ class DBSpendUpdateWriter:
                )
                return
-            await DBSpendUpdateWriter._update_transaction_list(
+            await self.spend_update_queue.add_update(
-                response_cost=response_cost,
+                update=SpendUpdateQueueItem(
                entity_id=org_id,
                transaction_list=prisma_client.org_list_transactions,
                    entity_type=Litellm_EntityType.ORGANIZATION,
-                prisma_client=prisma_client,
+                    entity_id=org_id,
                    response_cost=response_cost,
                )
            )
        except Exception as e:
            verbose_proxy_logger.info(
@ -324,8 +283,8 @@ class DBSpendUpdateWriter:
            )
            raise e
    @staticmethod
    async def _insert_spend_log_to_db(
        self,
        kwargs: Optional[dict],
        completion_response: Optional[Union[litellm.ModelResponse, Any, Exception]],
        start_time: Optional[datetime],
@ -346,7 +305,7 @@ class DBSpendUpdateWriter:
                    end_time=end_time,
                )
                payload["spend"] = response_cost or 0.0
-                DBSpendUpdateWriter._set_spend_logs_payload(
+                await self._set_spend_logs_payload(
                    payload=payload,
                    spend_logs_url=os.getenv("SPEND_LOGS_URL"),
                    prisma_client=prisma_client,
@ -357,8 +316,8 @@ class DBSpendUpdateWriter:
            )
            raise e
-    @staticmethod
+    async def _set_spend_logs_payload(
-    def _set_spend_logs_payload(
+        self,
        payload: Union[dict, SpendLogsPayload],
        prisma_client: PrismaClient,
        spend_logs_url: Optional[str] = None,
@ -377,8 +336,9 @@ class DBSpendUpdateWriter:
        elif prisma_client is not None:
            prisma_client.spend_log_transactions.append(payload)
-        prisma_client.add_spend_log_transaction_to_daily_user_transaction(
+        await self.add_spend_log_transaction_to_daily_user_transaction(
-            payload.copy()
+            payload=payload.copy(),
            prisma_client=prisma_client,
        )
        return prisma_client
@ -435,7 +395,8 @@ class DBSpendUpdateWriter:
            - Only 1 pod will commit to db at a time (based on if it can acquire the lock over writing to DB)
        """
        await self.redis_update_buffer.store_in_memory_spend_updates_in_redis(
-            prisma_client=prisma_client,
+            spend_update_queue=self.spend_update_queue,
            daily_spend_update_queue=self.daily_spend_update_queue,
        )
        # Only commit from redis to db if this pod is the leader
@ -447,12 +408,23 @@ class DBSpendUpdateWriter:
                    await self.redis_update_buffer.get_all_update_transactions_from_redis_buffer()
                )
                if db_spend_update_transactions is not None:
-                    await DBSpendUpdateWriter._commit_spend_updates_to_db(
+                    await self._commit_spend_updates_to_db(
                        prisma_client=prisma_client,
                        n_retry_times=n_retry_times,
                        proxy_logging_obj=proxy_logging_obj,
                        db_spend_update_transactions=db_spend_update_transactions,
                    )
                daily_spend_update_transactions = (
                    await self.redis_update_buffer.get_all_daily_spend_update_transactions_from_redis_buffer()
                )
                if daily_spend_update_transactions is not None:
                    await DBSpendUpdateWriter.update_daily_user_spend(
                        n_retry_times=n_retry_times,
                        prisma_client=prisma_client,
                        proxy_logging_obj=proxy_logging_obj,
                        daily_spend_transactions=daily_spend_update_transactions,
                    )
            except Exception as e:
                verbose_proxy_logger.error(f"Error committing spend updates: {e}")
            finally:
@ -471,23 +443,34 @@ class DBSpendUpdateWriter:
        Note: This flow causes Deadlocks in production (1K RPS+). Use self._commit_spend_updates_to_db_with_redis() instead if you expect 1K+ RPS.
        """
-        db_spend_update_transactions = DBSpendUpdateTransactions(
+
-            user_list_transactions=prisma_client.user_list_transactions,
+        # Aggregate all in memory spend updates (key, user, end_user, team, team_member, org) and commit to db
-            end_user_list_transactions=prisma_client.end_user_list_transactions,
+        ################## Spend Update Transactions ##################
-            key_list_transactions=prisma_client.key_list_transactions,
+        db_spend_update_transactions = (
-            team_list_transactions=prisma_client.team_list_transactions,
+            await self.spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
            team_member_list_transactions=prisma_client.team_member_list_transactions,
            org_list_transactions=prisma_client.org_list_transactions,
        )
-        await DBSpendUpdateWriter._commit_spend_updates_to_db(
+        await self._commit_spend_updates_to_db(
            prisma_client=prisma_client,
            n_retry_times=n_retry_times,
            proxy_logging_obj=proxy_logging_obj,
            db_spend_update_transactions=db_spend_update_transactions,
        )
-    @staticmethod
+        ################## Daily Spend Update Transactions ##################
        # Aggregate all in memory daily spend transactions and commit to db
        daily_spend_update_transactions = (
            await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
        )
        await DBSpendUpdateWriter.update_daily_user_spend(
            n_retry_times=n_retry_times,
            prisma_client=prisma_client,
            proxy_logging_obj=proxy_logging_obj,
            daily_spend_transactions=daily_spend_update_transactions,
        )
    async def _commit_spend_updates_to_db(  # noqa: PLR0915
        self,
        prisma_client: PrismaClient,
        n_retry_times: int,
        proxy_logging_obj: ProxyLogging,
@ -526,9 +509,6 @@ class DBSpendUpdateWriter:
                                    where={"user_id": user_id},
                                    data={"spend": {"increment": response_cost}},
                                )
                    prisma_client.user_list_transactions = (
                        {}
                    )  # Clear the remaining transactions after processing all batches in the loop.
                    break
                except DB_CONNECTION_ERROR_TYPES as e:
                    if (
@ -561,6 +541,7 @@ class DBSpendUpdateWriter:
                n_retry_times=n_retry_times,
                prisma_client=prisma_client,
                proxy_logging_obj=proxy_logging_obj,
                end_user_list_transactions=end_user_list_transactions,
            )
        ### UPDATE KEY TABLE ###
        key_list_transactions = db_spend_update_transactions["key_list_transactions"]
@ -583,9 +564,6 @@ class DBSpendUpdateWriter:
                                    where={"token": token},
                                    data={"spend": {"increment": response_cost}},
                                )
                    prisma_client.key_list_transactions = (
                        {}
                    )  # Clear the remaining transactions after processing all batches in the loop.
                    break
                except DB_CONNECTION_ERROR_TYPES as e:
                    if (
@ -632,9 +610,6 @@ class DBSpendUpdateWriter:
                                    where={"team_id": team_id},
                                    data={"spend": {"increment": response_cost}},
                                )
                    prisma_client.team_list_transactions = (
                        {}
                    )  # Clear the remaining transactions after processing all batches in the loop.
                    break
                except DB_CONNECTION_ERROR_TYPES as e:
                    if (
@ -684,9 +659,6 @@ class DBSpendUpdateWriter:
                                    where={"team_id": team_id, "user_id": user_id},
                                    data={"spend": {"increment": response_cost}},
                                )
                    prisma_client.team_member_list_transactions = (
                        {}
                    )  # Clear the remaining transactions after processing all batches in the loop.
                    break
                except DB_CONNECTION_ERROR_TYPES as e:
                    if (
@ -725,9 +697,6 @@ class DBSpendUpdateWriter:
                                    where={"organization_id": org_id},
                                    data={"spend": {"increment": response_cost}},
                                )
                    prisma_client.org_list_transactions = (
                        {}
                    )  # Clear the remaining transactions after processing all batches in the loop.
                    break
                except DB_CONNECTION_ERROR_TYPES as e:
                    if (
@ -744,3 +713,192 @@ class DBSpendUpdateWriter:
                    _raise_failed_update_spend_exception(
                        e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
                    )
    @staticmethod
    async def update_daily_user_spend(
        n_retry_times: int,
        prisma_client: PrismaClient,
        proxy_logging_obj: ProxyLogging,
        daily_spend_transactions: Dict[str, DailyUserSpendTransaction],
    ):
        """
        Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
        """
        from litellm.proxy.utils import _raise_failed_update_spend_exception
        ### UPDATE DAILY USER SPEND ###
        verbose_proxy_logger.debug(
            "Daily User Spend transactions: {}".format(len(daily_spend_transactions))
        )
        BATCH_SIZE = (
            100  # Number of aggregated records to update in each database operation
        )
        start_time = time.time()
        try:
            for i in range(n_retry_times + 1):
                try:
                    # Get transactions to process
                    transactions_to_process = dict(
                        list(daily_spend_transactions.items())[:BATCH_SIZE]
                    )
                    if len(transactions_to_process) == 0:
                        verbose_proxy_logger.debug(
                            "No new transactions to process for daily spend update"
                        )
                        break
                    # Update DailyUserSpend table in batches
                    async with prisma_client.db.batch_() as batcher:
                        for _, transaction in transactions_to_process.items():
                            user_id = transaction.get("user_id")
                            if not user_id:  # Skip if no user_id
                                continue
                            batcher.litellm_dailyuserspend.upsert(
                                where={
                                    "user_id_date_api_key_model_custom_llm_provider": {
                                        "user_id": user_id,
                                        "date": transaction["date"],
                                        "api_key": transaction["api_key"],
                                        "model": transaction["model"],
                                        "custom_llm_provider": transaction.get(
                                            "custom_llm_provider"
                                        ),
                                    }
                                },
                                data={
                                    "create": {
                                        "user_id": user_id,
                                        "date": transaction["date"],
                                        "api_key": transaction["api_key"],
                                        "model": transaction["model"],
                                        "model_group": transaction.get("model_group"),
                                        "custom_llm_provider": transaction.get(
                                            "custom_llm_provider"
                                        ),
                                        "prompt_tokens": transaction["prompt_tokens"],
                                        "completion_tokens": transaction[
                                            "completion_tokens"
                                        ],
                                        "spend": transaction["spend"],
                                        "api_requests": transaction["api_requests"],
                                        "successful_requests": transaction[
                                            "successful_requests"
                                        ],
                                        "failed_requests": transaction[
                                            "failed_requests"
                                        ],
                                    },
                                    "update": {
                                        "prompt_tokens": {
                                            "increment": transaction["prompt_tokens"]
                                        },
                                        "completion_tokens": {
                                            "increment": transaction[
                                                "completion_tokens"
                                            ]
                                        },
                                        "spend": {"increment": transaction["spend"]},
                                        "api_requests": {
                                            "increment": transaction["api_requests"]
                                        },
                                        "successful_requests": {
                                            "increment": transaction[
                                                "successful_requests"
                                            ]
                                        },
                                        "failed_requests": {
                                            "increment": transaction["failed_requests"]
                                        },
                                    },
                                },
                            )
                    verbose_proxy_logger.info(
                        f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
                    )
                    # Remove processed transactions
                    for key in transactions_to_process.keys():
                        daily_spend_transactions.pop(key, None)
                    verbose_proxy_logger.debug(
                        f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
                    )
                    break
                except DB_CONNECTION_ERROR_TYPES as e:
                    if i >= n_retry_times:
                        _raise_failed_update_spend_exception(
                            e=e,
                            start_time=start_time,
                            proxy_logging_obj=proxy_logging_obj,
                        )
                    await asyncio.sleep(2**i)  # Exponential backoff
        except Exception as e:
            # Remove processed transactions even if there was an error
            if "transactions_to_process" in locals():
                for key in transactions_to_process.keys():  # type: ignore
                    daily_spend_transactions.pop(key, None)
            _raise_failed_update_spend_exception(
                e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
            )
    async def add_spend_log_transaction_to_daily_user_transaction(
        self,
        payload: Union[dict, SpendLogsPayload],
        prisma_client: PrismaClient,
    ):
        """
        Add a spend log transaction to the `daily_spend_update_queue`
        Key = @@unique([user_id, date, api_key, model, custom_llm_provider])    )
        If key exists, update the transaction with the new spend and usage
        """
        expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
        if not all(key in payload for key in expected_keys):
            verbose_proxy_logger.debug(
                f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
            )
            return
        request_status = prisma_client.get_request_status(payload)
        verbose_proxy_logger.info(f"Logged request status: {request_status}")
        if isinstance(payload["startTime"], datetime):
            start_time = payload["startTime"].isoformat()
            date = start_time.split("T")[0]
        elif isinstance(payload["startTime"], str):
            date = payload["startTime"].split("T")[0]
        else:
            verbose_proxy_logger.debug(
                f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
            )
            return
        try:
            daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
            daily_transaction = DailyUserSpendTransaction(
                user_id=payload["user"],
                date=date,
                api_key=payload["api_key"],
                model=payload["model"],
                model_group=payload["model_group"],
                custom_llm_provider=payload["custom_llm_provider"],
                prompt_tokens=payload["prompt_tokens"],
                completion_tokens=payload["completion_tokens"],
                spend=payload["spend"],
                api_requests=1,
                successful_requests=1 if request_status == "success" else 0,
                failed_requests=1 if request_status != "success" else 0,
            )
            await self.daily_spend_update_queue.add_update(
                update={daily_transaction_key: daily_transaction}
            )
        except Exception as e:
            raise e
--- a/litellm/proxy/db/db_transaction_queue/base_update_queue.py
+++ b/litellm/proxy/db/db_transaction_queue/base_update_queue.py
@ -0,0 +1,25 @@
 """
 Base class for in memory buffer for database transactions
 """
 import asyncio
 from litellm._logging import verbose_proxy_logger
 class BaseUpdateQueue:
    """Base class for in memory buffer for database transactions"""
    def __init__(self):
        self.update_queue = asyncio.Queue()
    async def add_update(self, update):
        """Enqueue an update."""
        verbose_proxy_logger.debug("Adding update to queue: %s", update)
        await self.update_queue.put(update)
    async def flush_all_updates_from_in_memory_queue(self):
        """Get all updates from the queue."""
        updates = []
        while not self.update_queue.empty():
            updates.append(await self.update_queue.get())
        return updates
--- a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py
+++ b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py
@ -0,0 +1,95 @@
 import asyncio
 from typing import Dict, List
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import DailyUserSpendTransaction
 from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
 class DailySpendUpdateQueue(BaseUpdateQueue):
    """
    In memory buffer for daily spend updates that should be committed to the database
    To add a new daily spend update transaction, use the following format:
        daily_spend_update_queue.add_update({
            "user1_date_api_key_model_custom_llm_provider": {
                "spend": 10,
                "prompt_tokens": 100,
                "completion_tokens": 100,
            }
        })
    Queue contains a list of daily spend update transactions
    eg
        queue = [
            {
                "user1_date_api_key_model_custom_llm_provider": {
                    "spend": 10,
                    "prompt_tokens": 100,
                    "completion_tokens": 100,
                    "api_requests": 100,
                    "successful_requests": 100,
                    "failed_requests": 100,
                }
            },
            {
                "user2_date_api_key_model_custom_llm_provider": {
                    "spend": 10,
                    "prompt_tokens": 100,
                    "completion_tokens": 100,
                    "api_requests": 100,
                    "successful_requests": 100,
                    "failed_requests": 100,
                }
            }
        ]
    """
    def __init__(self):
        super().__init__()
        self.update_queue: asyncio.Queue[
            Dict[str, DailyUserSpendTransaction]
        ] = asyncio.Queue()
    async def flush_and_get_aggregated_daily_spend_update_transactions(
        self,
    ) -> Dict[str, DailyUserSpendTransaction]:
        """Get all updates from the queue and return all updates aggregated by daily_transaction_key."""
        updates = await self.flush_all_updates_from_in_memory_queue()
        aggregated_daily_spend_update_transactions = (
            DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
                updates
            )
        )
        verbose_proxy_logger.debug(
            "Aggregated daily spend update transactions: %s",
            aggregated_daily_spend_update_transactions,
        )
        return aggregated_daily_spend_update_transactions
    @staticmethod
    def get_aggregated_daily_spend_update_transactions(
        updates: List[Dict[str, DailyUserSpendTransaction]]
    ) -> Dict[str, DailyUserSpendTransaction]:
        """Aggregate updates by daily_transaction_key."""
        aggregated_daily_spend_update_transactions: Dict[
            str, DailyUserSpendTransaction
        ] = {}
        for _update in updates:
            for _key, payload in _update.items():
                if _key in aggregated_daily_spend_update_transactions:
                    daily_transaction = aggregated_daily_spend_update_transactions[_key]
                    daily_transaction["spend"] += payload["spend"]
                    daily_transaction["prompt_tokens"] += payload["prompt_tokens"]
                    daily_transaction["completion_tokens"] += payload[
                        "completion_tokens"
                    ]
                    daily_transaction["api_requests"] += payload["api_requests"]
                    daily_transaction["successful_requests"] += payload[
                        "successful_requests"
                    ]
                    daily_transaction["failed_requests"] += payload["failed_requests"]
                else:
                    aggregated_daily_spend_update_transactions[_key] = payload
        return aggregated_daily_spend_update_transactions
--- a/litellm/proxy/db/db_transaction_queue/pod_lock_manager.py
+++ b/litellm/proxy/db/db_transaction_queue/pod_lock_manager.py
--- a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py
+++ b/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py
@ -9,9 +9,17 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 from litellm._logging import verbose_proxy_logger
 from litellm.caching import RedisCache
-from litellm.constants import MAX_REDIS_BUFFER_DEQUEUE_COUNT, REDIS_UPDATE_BUFFER_KEY
+from litellm.constants import (
    MAX_REDIS_BUFFER_DEQUEUE_COUNT,
    REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
    REDIS_UPDATE_BUFFER_KEY,
 )
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
-from litellm.proxy._types import DBSpendUpdateTransactions
+from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions
 from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
    DailySpendUpdateQueue,
 )
 from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 from litellm.secret_managers.main import str_to_bool
 if TYPE_CHECKING:
@ -54,11 +62,17 @@ class RedisUpdateBuffer:
    async def store_in_memory_spend_updates_in_redis(
        self,
-        prisma_client: PrismaClient,
+        spend_update_queue: SpendUpdateQueue,
        daily_spend_update_queue: DailySpendUpdateQueue,
    ):
        """
        Stores the in-memory spend updates to Redis
        Stores the following in memory data structures in Redis:
            - SpendUpdateQueue - Key, User, Team, TeamMember, Org, EndUser Spend updates
            - DailySpendUpdateQueue - Daily Spend updates Aggregate view
        For SpendUpdateQueue:
            Each transaction is a dict stored as following:
            - key is the entity id
            - value is the spend amount
@ -72,19 +86,46 @@ class RedisUpdateBuffer:
                    "0929880203": 0.001,
                ]
                ```
        For DailySpendUpdateQueue:
            Each transaction is a Dict[str, DailyUserSpendTransaction] stored as following:
            - key is the daily_transaction_key
            - value is the DailyUserSpendTransaction
                ```
                Redis List:
                daily_spend_update_transactions:
                [
                    {
                        "user_keyhash_1_model_1": {
                            "spend": 1.2,
                            "prompt_tokens": 1000,
                            "completion_tokens": 1000,
                            "api_requests": 1000,
                            "successful_requests": 1000,
                        },
                    }
                ]
                ```
        """
        if self.redis_cache is None:
            verbose_proxy_logger.debug(
                "redis_cache is None, skipping store_in_memory_spend_updates_in_redis"
            )
            return
-        db_spend_update_transactions: DBSpendUpdateTransactions = DBSpendUpdateTransactions(
+
-            user_list_transactions=prisma_client.user_list_transactions,
+        db_spend_update_transactions = (
-            end_user_list_transactions=prisma_client.end_user_list_transactions,
+            await spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
-            key_list_transactions=prisma_client.key_list_transactions,
+        )
-            team_list_transactions=prisma_client.team_list_transactions,
+        verbose_proxy_logger.debug(
-            team_member_list_transactions=prisma_client.team_member_list_transactions,
+            "ALL DB SPEND UPDATE TRANSACTIONS: %s", db_spend_update_transactions
-            org_list_transactions=prisma_client.org_list_transactions,
+        )
        daily_spend_update_transactions = (
            await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
        )
        verbose_proxy_logger.debug(
            "ALL DAILY SPEND UPDATE TRANSACTIONS: %s", daily_spend_update_transactions
        )
        # only store in redis if there are any updates to commit
@ -100,8 +141,13 @@ class RedisUpdateBuffer:
            values=list_of_transactions,
        )
-        # clear the in-memory spend updates
+        list_of_daily_spend_update_transactions = [
-        RedisUpdateBuffer._clear_all_in_memory_spend_updates(prisma_client)
+            safe_dumps(daily_spend_update_transactions)
        ]
        await self.redis_cache.async_rpush(
            key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
            values=list_of_daily_spend_update_transactions,
        )
    @staticmethod
    def _number_of_transactions_to_store_in_redis(
@ -116,20 +162,6 @@ class RedisUpdateBuffer:
                num_transactions += len(v)
        return num_transactions
    @staticmethod
    def _clear_all_in_memory_spend_updates(
        prisma_client: PrismaClient,
    ):
        """
        Clears all in-memory spend updates
        """
        prisma_client.user_list_transactions = {}
        prisma_client.end_user_list_transactions = {}
        prisma_client.key_list_transactions = {}
        prisma_client.team_list_transactions = {}
        prisma_client.team_member_list_transactions = {}
        prisma_client.org_list_transactions = {}
    @staticmethod
    def _remove_prefix_from_keys(data: Dict[str, Any], prefix: str) -> Dict[str, Any]:
        """
@ -197,6 +229,27 @@ class RedisUpdateBuffer:
        return combined_transaction
    async def get_all_daily_spend_update_transactions_from_redis_buffer(
        self,
    ) -> Optional[Dict[str, DailyUserSpendTransaction]]:
        """
        Gets all the daily spend update transactions from Redis
        """
        if self.redis_cache is None:
            return None
        list_of_transactions = await self.redis_cache.async_lpop(
            key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
            count=MAX_REDIS_BUFFER_DEQUEUE_COUNT,
        )
        if list_of_transactions is None:
            return None
        list_of_daily_spend_update_transactions = [
            json.loads(transaction) for transaction in list_of_transactions
        ]
        return DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
            list_of_daily_spend_update_transactions
        )
    @staticmethod
    def _parse_list_of_transactions(
        list_of_transactions: Union[Any, List[Any]],
--- a/litellm/proxy/db/db_transaction_queue/spend_update_queue.py
+++ b/litellm/proxy/db/db_transaction_queue/spend_update_queue.py
@ -0,0 +1,113 @@
 import asyncio
 from typing import List
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import (
    DBSpendUpdateTransactions,
    Litellm_EntityType,
    SpendUpdateQueueItem,
 )
 from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
 class SpendUpdateQueue(BaseUpdateQueue):
    """
    In memory buffer for spend updates that should be committed to the database
    """
    def __init__(self):
        super().__init__()
        self.update_queue: asyncio.Queue[SpendUpdateQueueItem] = asyncio.Queue()
    async def flush_and_get_aggregated_db_spend_update_transactions(
        self,
    ) -> DBSpendUpdateTransactions:
        """Flush all updates from the queue and return all updates aggregated by entity type."""
        updates = await self.flush_all_updates_from_in_memory_queue()
        verbose_proxy_logger.debug("Aggregating updates by entity type: %s", updates)
        return self.get_aggregated_db_spend_update_transactions(updates)
    def get_aggregated_db_spend_update_transactions(
        self, updates: List[SpendUpdateQueueItem]
    ) -> DBSpendUpdateTransactions:
        """Aggregate updates by entity type."""
        # Initialize all transaction lists as empty dicts
        db_spend_update_transactions = DBSpendUpdateTransactions(
            user_list_transactions={},
            end_user_list_transactions={},
            key_list_transactions={},
            team_list_transactions={},
            team_member_list_transactions={},
            org_list_transactions={},
        )
        # Map entity types to their corresponding transaction dictionary keys
        entity_type_to_dict_key = {
            Litellm_EntityType.USER: "user_list_transactions",
            Litellm_EntityType.END_USER: "end_user_list_transactions",
            Litellm_EntityType.KEY: "key_list_transactions",
            Litellm_EntityType.TEAM: "team_list_transactions",
            Litellm_EntityType.TEAM_MEMBER: "team_member_list_transactions",
            Litellm_EntityType.ORGANIZATION: "org_list_transactions",
        }
        for update in updates:
            entity_type = update.get("entity_type")
            entity_id = update.get("entity_id") or ""
            response_cost = update.get("response_cost") or 0
            if entity_type is None:
                verbose_proxy_logger.debug(
                    "Skipping update spend for update: %s, because entity_type is None",
                    update,
                )
                continue
            dict_key = entity_type_to_dict_key.get(entity_type)
            if dict_key is None:
                verbose_proxy_logger.debug(
                    "Skipping update spend for update: %s, because entity_type is not in entity_type_to_dict_key",
                    update,
                )
                continue  # Skip unknown entity types
            # Type-safe access using if/elif statements
            if dict_key == "user_list_transactions":
                transactions_dict = db_spend_update_transactions[
                    "user_list_transactions"
                ]
            elif dict_key == "end_user_list_transactions":
                transactions_dict = db_spend_update_transactions[
                    "end_user_list_transactions"
                ]
            elif dict_key == "key_list_transactions":
                transactions_dict = db_spend_update_transactions[
                    "key_list_transactions"
                ]
            elif dict_key == "team_list_transactions":
                transactions_dict = db_spend_update_transactions[
                    "team_list_transactions"
                ]
            elif dict_key == "team_member_list_transactions":
                transactions_dict = db_spend_update_transactions[
                    "team_member_list_transactions"
                ]
            elif dict_key == "org_list_transactions":
                transactions_dict = db_spend_update_transactions[
                    "org_list_transactions"
                ]
            else:
                continue
            if transactions_dict is None:
                transactions_dict = {}
                # type ignore: dict_key is guaranteed to be one of "one of ("user_list_transactions", "end_user_list_transactions", "key_list_transactions", "team_list_transactions", "team_member_list_transactions", "org_list_transactions")"
                db_spend_update_transactions[dict_key] = transactions_dict  # type: ignore
            if entity_id not in transactions_dict:
                transactions_dict[entity_id] = 0
            transactions_dict[entity_id] += response_cost or 0
        return db_spend_update_transactions
--- a/litellm/proxy/example_config_yaml/spend_tracking_config.yaml
+++ b/litellm/proxy/example_config_yaml/spend_tracking_config.yaml
@ -0,0 +1,15 @@
 model_list:
  - model_name: fake-openai-endpoint
    litellm_params:
      model: openai/fake
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
 general_settings:
  use_redis_transaction_buffer: true
 litellm_settings:
  cache: True
  cache_params:
    type: redis
    supported_call_types: []
--- a/litellm/proxy/guardrails/guardrail_hooks/aim.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/aim.py
@ -14,6 +14,7 @@ from pydantic import BaseModel
 from websockets.asyncio.client import ClientConnection, connect
 from litellm import DualCache
 from litellm._version import version as litellm_version
 from litellm._logging import verbose_proxy_logger
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.llms.custom_httpx.http_handler import (
@ -75,7 +76,9 @@ class AimGuardrail(CustomGuardrail):
    ) -> Union[Exception, str, dict, None]:
        verbose_proxy_logger.debug("Inside AIM Pre-Call Hook")
-        await self.call_aim_guardrail(data, hook="pre_call")
+        await self.call_aim_guardrail(
            data, hook="pre_call", key_alias=user_api_key_dict.key_alias
        )
        return data
    async def async_moderation_hook(
@ -93,15 +96,18 @@ class AimGuardrail(CustomGuardrail):
    ) -> Union[Exception, str, dict, None]:
        verbose_proxy_logger.debug("Inside AIM Moderation Hook")
-        await self.call_aim_guardrail(data, hook="moderation")
+        await self.call_aim_guardrail(
            data, hook="moderation", key_alias=user_api_key_dict.key_alias
        )
        return data
-    async def call_aim_guardrail(self, data: dict, hook: str) -> None:
+    async def call_aim_guardrail(
        self, data: dict, hook: str, key_alias: Optional[str]
    ) -> None:
        user_email = data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
-        headers = {
+        headers = self._build_aim_headers(
-            "Authorization": f"Bearer {self.api_key}",
+            hook=hook, key_alias=key_alias, user_email=user_email
-            "x-aim-litellm-hook": hook,
+        )
        } | ({"x-aim-user-email": user_email} if user_email else {})
        response = await self.async_handler.post(
            f"{self.api_base}/detect/openai",
            headers=headers,
@ -120,18 +126,16 @@ class AimGuardrail(CustomGuardrail):
            raise HTTPException(status_code=400, detail=res["detection_message"])
    async def call_aim_guardrail_on_output(
-        self, request_data: dict, output: str, hook: str
+        self, request_data: dict, output: str, hook: str, key_alias: Optional[str]
    ) -> Optional[str]:
        user_email = (
            request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
        )
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "x-aim-litellm-hook": hook,
        } | ({"x-aim-user-email": user_email} if user_email else {})
        response = await self.async_handler.post(
            f"{self.api_base}/detect/output",
-            headers=headers,
+            headers=self._build_aim_headers(
                hook=hook, key_alias=key_alias, user_email=user_email
            ),
            json={"output": output, "messages": request_data.get("messages", [])},
        )
        response.raise_for_status()
@ -147,6 +151,32 @@ class AimGuardrail(CustomGuardrail):
            return res["detection_message"]
        return None
    def _build_aim_headers(
        self, *, hook: str, key_alias: Optional[str], user_email: Optional[str]
    ):
        """
        A helper function to build the http headers that are required by AIM guardrails.
        """
        return (
            {
                "Authorization": f"Bearer {self.api_key}",
                # Used by Aim to apply only the guardrails that should be applied in a specific request phase.
                "x-aim-litellm-hook": hook,
                # Used by Aim to track LiteLLM version and provide backward compatibility.
                "x-aim-litellm-version": litellm_version,
            }
            # Used by Aim to track guardrails violations by user.
            | ({"x-aim-user-email": user_email} if user_email else {})
            | (
                {
                    # Used by Aim apply only the guardrails that are associated with the key alias.
                    "x-aim-litellm-key-alias": key_alias,
                }
                if key_alias
                else {}
            )
        )
    async def async_post_call_success_hook(
        self,
        data: dict,
@ -160,7 +190,7 @@ class AimGuardrail(CustomGuardrail):
        ):
            content = response.choices[0].message.content or ""
            detection = await self.call_aim_guardrail_on_output(
-                data, content, hook="output"
+                data, content, hook="output", key_alias=user_api_key_dict.key_alias
            )
            if detection:
                raise HTTPException(status_code=400, detail=detection)
@ -174,11 +204,13 @@ class AimGuardrail(CustomGuardrail):
        user_email = (
            request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
        )
        headers = {
            "Authorization": f"Bearer {self.api_key}",
        } | ({"x-aim-user-email": user_email} if user_email else {})
        async with connect(
-            f"{self.ws_api_base}/detect/output/ws", additional_headers=headers
+            f"{self.ws_api_base}/detect/output/ws",
            additional_headers=self._build_aim_headers(
                hook="output",
                key_alias=user_api_key_dict.key_alias,
                user_email=user_email,
            ),
        ) as websocket:
            sender = asyncio.create_task(
                self.forward_the_stream_to_aim(websocket, response)
--- a/litellm/proxy/hooks/proxy_track_cost_callback.py
+++ b/litellm/proxy/hooks/proxy_track_cost_callback.py
@ -13,7 +13,6 @@ from litellm.litellm_core_utils.core_helpers import (
 from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.auth.auth_checks import log_db_metrics
 from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
 from litellm.proxy.utils import ProxyUpdateSpend
 from litellm.types.utils import (
    StandardLoggingPayload,
@ -37,6 +36,8 @@ class _ProxyDBLogger(CustomLogger):
        if _ProxyDBLogger._should_track_errors_in_db() is False:
            return
        from litellm.proxy.proxy_server import proxy_logging_obj
        _metadata = dict(
            StandardLoggingUserAPIKeyMetadata(
                user_api_key_hash=user_api_key_dict.api_key,
@ -66,7 +67,7 @@ class _ProxyDBLogger(CustomLogger):
            request_data.get("proxy_server_request") or {}
        )
        request_data["litellm_params"]["metadata"] = existing_metadata
-        await DBSpendUpdateWriter.update_database(
+        await proxy_logging_obj.db_spend_update_writer.update_database(
            token=user_api_key_dict.api_key,
            response_cost=0.0,
            user_id=user_api_key_dict.user_id,
@ -136,7 +137,7 @@ class _ProxyDBLogger(CustomLogger):
                    end_user_id=end_user_id,
                ):
                    ## UPDATE DATABASE
-                    await DBSpendUpdateWriter.update_database(
+                    await proxy_logging_obj.db_spend_update_writer.update_database(
                        token=user_api_key,
                        response_cost=response_cost,
                        user_id=user_id,
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -747,7 +747,10 @@ def _get_enforced_params(
    enforced_params: Optional[list] = None
    if general_settings is not None:
        enforced_params = general_settings.get("enforced_params")
-        if "service_account_settings" in general_settings:
+        if (
            "service_account_settings" in general_settings
            and check_if_token_is_service_account(user_api_key_dict) is True
        ):
            service_account_settings = general_settings["service_account_settings"]
            if "enforced_params" in service_account_settings:
                if enforced_params is None:
@ -760,6 +763,20 @@ def _get_enforced_params(
    return enforced_params
 def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
    """
    Checks if the token is a service account
    Returns:
        bool: True if token is a service account
    """
    if valid_token.metadata:
        if "service_account_id" in valid_token.metadata:
            return True
    return False
 def _enforced_params_check(
    request_body: dict,
    general_settings: Optional[dict],
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -1259,19 +1259,43 @@ class SpendMetrics(BaseModel):
    prompt_tokens: int = Field(default=0)
    completion_tokens: int = Field(default=0)
    total_tokens: int = Field(default=0)
    successful_requests: int = Field(default=0)
    failed_requests: int = Field(default=0)
    api_requests: int = Field(default=0)
 class MetricBase(BaseModel):
    metrics: SpendMetrics
 class MetricWithMetadata(MetricBase):
    metadata: Dict[str, Any] = Field(default_factory=dict)
 class KeyMetadata(BaseModel):
    """Metadata for a key"""
    key_alias: Optional[str] = None
 class KeyMetricWithMetadata(MetricBase):
    """Base class for metrics with additional metadata"""
    metadata: KeyMetadata = Field(default_factory=KeyMetadata)
 class BreakdownMetrics(BaseModel):
    """Breakdown of spend by different dimensions"""
-    models: Dict[str, SpendMetrics] = Field(default_factory=dict)  # model -> metrics
+    models: Dict[str, MetricWithMetadata] = Field(
    providers: Dict[str, SpendMetrics] = Field(
        default_factory=dict
-    )  # provider -> metrics
+    )  # model -> {metrics, metadata}
-    api_keys: Dict[str, SpendMetrics] = Field(
+    providers: Dict[str, MetricWithMetadata] = Field(
        default_factory=dict
-    )  # api_key -> metrics
+    )  # provider -> {metrics, metadata}
    api_keys: Dict[str, KeyMetricWithMetadata] = Field(
        default_factory=dict
    )  # api_key -> {metrics, metadata}
 class DailySpendData(BaseModel):
@ -1284,7 +1308,10 @@ class DailySpendMetadata(BaseModel):
    total_spend: float = Field(default=0.0)
    total_prompt_tokens: int = Field(default=0)
    total_completion_tokens: int = Field(default=0)
    total_tokens: int = Field(default=0)
    total_api_requests: int = Field(default=0)
    total_successful_requests: int = Field(default=0)
    total_failed_requests: int = Field(default=0)
    page: int = Field(default=1)
    total_pages: int = Field(default=1)
    has_more: bool = Field(default=False)
@ -1307,6 +1334,8 @@ class LiteLLM_DailyUserSpend(BaseModel):
    completion_tokens: int = 0
    spend: float = 0.0
    api_requests: int = 0
    successful_requests: int = 0
    failed_requests: int = 0
 class GroupedData(TypedDict):
@ -1322,34 +1351,57 @@ def update_metrics(
    group_metrics.completion_tokens += record.completion_tokens
    group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
    group_metrics.api_requests += record.api_requests
    group_metrics.successful_requests += record.successful_requests
    group_metrics.failed_requests += record.failed_requests
    return group_metrics
 def update_breakdown_metrics(
-    breakdown: BreakdownMetrics, record: LiteLLM_DailyUserSpend
+    breakdown: BreakdownMetrics,
    record: LiteLLM_DailyUserSpend,
    model_metadata: Dict[str, Dict[str, Any]],
    provider_metadata: Dict[str, Dict[str, Any]],
    api_key_metadata: Dict[str, Dict[str, Any]],
 ) -> BreakdownMetrics:
    """Updates breakdown metrics for a single record using the existing update_metrics function"""
    # Update model breakdown
    if record.model not in breakdown.models:
-        breakdown.models[record.model] = SpendMetrics()
+        breakdown.models[record.model] = MetricWithMetadata(
-    breakdown.models[record.model] = update_metrics(
+            metrics=SpendMetrics(),
-        breakdown.models[record.model], record
+            metadata=model_metadata.get(
                record.model, {}
            ),  # Add any model-specific metadata here
        )
    breakdown.models[record.model].metrics = update_metrics(
        breakdown.models[record.model].metrics, record
    )
    # Update provider breakdown
    provider = record.custom_llm_provider or "unknown"
    if provider not in breakdown.providers:
-        breakdown.providers[provider] = SpendMetrics()
+        breakdown.providers[provider] = MetricWithMetadata(
-    breakdown.providers[provider] = update_metrics(
+            metrics=SpendMetrics(),
-        breakdown.providers[provider], record
+            metadata=provider_metadata.get(
                provider, {}
            ),  # Add any provider-specific metadata here
        )
    breakdown.providers[provider].metrics = update_metrics(
        breakdown.providers[provider].metrics, record
    )
    # Update api key breakdown
    if record.api_key not in breakdown.api_keys:
-        breakdown.api_keys[record.api_key] = SpendMetrics()
+        breakdown.api_keys[record.api_key] = KeyMetricWithMetadata(
-    breakdown.api_keys[record.api_key] = update_metrics(
+            metrics=SpendMetrics(),
-        breakdown.api_keys[record.api_key], record
+            metadata=KeyMetadata(
                key_alias=api_key_metadata.get(record.api_key, {}).get(
                    "key_alias", None
                )
            ),  # Add any api_key-specific metadata here
        )
    breakdown.api_keys[record.api_key].metrics = update_metrics(
        breakdown.api_keys[record.api_key].metrics, record
    )
    return breakdown
@ -1428,6 +1480,14 @@ async def get_user_daily_activity(
        if api_key:
            where_conditions["api_key"] = api_key
        if (
            user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
            and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
        ):
            where_conditions[
                "user_id"
            ] = user_api_key_dict.user_id  # only allow access to own data
        # Get total count for pagination
        total_count = await prisma_client.db.litellm_dailyuserspend.count(
            where=where_conditions
@ -1443,6 +1503,28 @@ async def get_user_daily_activity(
            take=page_size,
        )
        daily_spend_data_pydantic_list = [
            LiteLLM_DailyUserSpend(**record.model_dump()) for record in daily_spend_data
        ]
        # Get all unique API keys from the spend data
        api_keys = set()
        for record in daily_spend_data_pydantic_list:
            if record.api_key:
                api_keys.add(record.api_key)
        # Fetch key aliases in bulk
        api_key_metadata: Dict[str, Dict[str, Any]] = {}
        model_metadata: Dict[str, Dict[str, Any]] = {}
        provider_metadata: Dict[str, Dict[str, Any]] = {}
        if api_keys:
            key_records = await prisma_client.db.litellm_verificationtoken.find_many(
                where={"token": {"in": list(api_keys)}}
            )
            api_key_metadata.update(
                {k.token: {"key_alias": k.key_alias} for k in key_records}
            )
        # Process results
        results = []
        total_metrics = SpendMetrics()
@ -1450,7 +1532,7 @@ async def get_user_daily_activity(
        # Group data by date and other dimensions
        grouped_data: Dict[str, Dict[str, Any]] = {}
-        for record in daily_spend_data:
+        for record in daily_spend_data_pydantic_list:
            date_str = record.date
            if date_str not in grouped_data:
                grouped_data[date_str] = {
@ -1464,7 +1546,11 @@ async def get_user_daily_activity(
            )
            # Update breakdowns
            grouped_data[date_str]["breakdown"] = update_breakdown_metrics(
-                grouped_data[date_str]["breakdown"], record
+                grouped_data[date_str]["breakdown"],
                record,
                model_metadata,
                provider_metadata,
                api_key_metadata,
            )
            # Update total metrics
@ -1474,7 +1560,9 @@ async def get_user_daily_activity(
            total_metrics.total_tokens += (
                record.prompt_tokens + record.completion_tokens
            )
-            total_metrics.api_requests += 1
+            total_metrics.api_requests += record.api_requests
            total_metrics.successful_requests += record.successful_requests
            total_metrics.failed_requests += record.failed_requests
        # Convert grouped data to response format
        for date_str, data in grouped_data.items():
@ -1495,7 +1583,10 @@ async def get_user_daily_activity(
                total_spend=total_metrics.spend,
                total_prompt_tokens=total_metrics.prompt_tokens,
                total_completion_tokens=total_metrics.completion_tokens,
                total_tokens=total_metrics.total_tokens,
                total_api_requests=total_metrics.api_requests,
                total_successful_requests=total_metrics.successful_requests,
                total_failed_requests=total_metrics.failed_requests,
                page=page,
                total_pages=-(-total_count // page_size),  # Ceiling division
                has_more=(page * page_size) < total_count,
--- a/litellm/proxy/management_endpoints/model_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/model_management_endpoints.py
@ -394,7 +394,7 @@ class ModelManagementAuthChecks:
    @staticmethod
    async def can_user_make_model_call(
-        model_params: Union[Deployment, updateDeployment],
+        model_params: Deployment,
        user_api_key_dict: UserAPIKeyAuth,
        prisma_client: PrismaClient,
        premium_user: bool,
@ -723,15 +723,6 @@ async def update_model(
                },
            )
        await ModelManagementAuthChecks.can_user_make_model_call(
            model_params=model_params,
            user_api_key_dict=user_api_key_dict,
            prisma_client=prisma_client,
            premium_user=premium_user,
        )
        # update DB
        if store_model_in_db is True:
        _model_id = None
        _model_info = getattr(model_params, "model_info", None)
        if _model_info is None:
@ -740,11 +731,13 @@ async def update_model(
        _model_id = _model_info.id
        if _model_id is None:
            raise Exception("model_info.id not provided")
        _existing_litellm_params = (
            await prisma_client.db.litellm_proxymodeltable.find_unique(
                where={"model_id": _model_id}
            )
        )
        if _existing_litellm_params is None:
            if (
                llm_router is not None
@ -756,7 +749,19 @@ async def update_model(
                        "error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit."
                    },
                )
            else:
                raise Exception("model not found")
        deployment = Deployment(**_existing_litellm_params.model_dump())
        await ModelManagementAuthChecks.can_user_make_model_call(
            model_params=deployment,
            user_api_key_dict=user_api_key_dict,
            prisma_client=prisma_client,
            premium_user=premium_user,
        )
        # update DB
        if store_model_in_db is True:
            _existing_litellm_params_dict = dict(
                _existing_litellm_params.litellm_params
            )
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,15 +1,6 @@
 model_list:
-  - model_name: gpt-4o
+  - model_name: fake-openai-endpoint
    litellm_params:
-      model: openai/gpt-4o
+      model: openai/fake
-      api_key: sk-xxxxxxx
+      api_key: fake-key
-
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
 mcp_servers:
  {
    "zapier_mcp": {
      "url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
    },
    "fetch": {
      "url": "http://localhost:8000/sse"
    }
  }
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3308,15 +3308,6 @@ async def model_list(
    tags=["chat/completions"],
    responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
 )  # azure compatible endpoint
@backoff.on_exception(
    backoff.expo,
    Exception,  # base exception to catch for the backoff
    max_tries=global_max_parallel_request_retries,  # maximum number of retries
    max_time=global_max_parallel_request_retry_timeout,  # maximum total time to retry for
    on_backoff=on_backoff,  # specifying the function to call on backoff
    giveup=giveup,
    logger=verbose_proxy_logger,
 )
 async def chat_completion(  # noqa: PLR0915
    request: Request,
    fastapi_response: Response,
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
  completion_tokens   Int      @default(0)
  spend               Float    @default(0.0)
  api_requests        Int      @default(0)
  successful_requests Int      @default(0)
  failed_requests     Int      @default(0)
  created_at          DateTime @default(now())
  updated_at          DateTime @updatedAt
@ -352,4 +354,3 @@ enum JobStatus {
  INACTIVE
 }
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -10,14 +10,24 @@ import traceback
 from datetime import datetime, timedelta
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, overload
+from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    List,
    Literal,
    Optional,
    Union,
    cast,
    overload,
 )
 from litellm.proxy._types import (
    DB_CONNECTION_ERROR_TYPES,
    CommonProxyErrors,
    DailyUserSpendTransaction,
    ProxyErrorTypes,
    ProxyException,
    SpendLogsMetadata,
    SpendLogsPayload,
 )
 from litellm.types.guardrails import GuardrailEventHooks
@ -1100,14 +1110,7 @@ def jsonify_object(data: dict) -> dict:
 class PrismaClient:
    user_list_transactions: dict = {}
    end_user_list_transactions: dict = {}
    key_list_transactions: dict = {}
    team_list_transactions: dict = {}
    team_member_list_transactions: dict = {}  # key is ["team_id" + "user_id"]
    org_list_transactions: dict = {}
    spend_log_transactions: List = []
    daily_user_spend_transactions: Dict[str, DailyUserSpendTransaction] = {}
    def __init__(
        self,
@ -1145,62 +1148,40 @@ class PrismaClient:
            )  # Client to connect to Prisma db
        verbose_proxy_logger.debug("Success - Created Prisma Client")
-    def add_spend_log_transaction_to_daily_user_transaction(
+    def get_request_status(
        self, payload: Union[dict, SpendLogsPayload]
-    ):
+    ) -> Literal["success", "failure"]:
        """
-        Add a spend log transaction to the daily user transaction list
+        Determine if a request was successful or failed based on payload metadata.
-        Key = @@unique([user_id, date, api_key, model, custom_llm_provider])    )
+        Args:
            payload (Union[dict, SpendLogsPayload]): Request payload containing metadata
-        If key exists, update the transaction with the new spend and usage
+        Returns:
            Literal["success", "failure"]: Request status
        """
        expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
        if not all(key in payload for key in expected_keys):
            verbose_proxy_logger.debug(
                f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
            )
            return
        if isinstance(payload["startTime"], datetime):
            start_time = payload["startTime"].isoformat()
            date = start_time.split("T")[0]
        elif isinstance(payload["startTime"], str):
            date = payload["startTime"].split("T")[0]
        else:
            verbose_proxy_logger.debug(
                f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
            )
            return
        try:
-            daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
+            # Get metadata and convert to dict if it's a JSON string
-            if daily_transaction_key in self.daily_user_spend_transactions:
+            payload_metadata: Union[Dict, SpendLogsMetadata, str] = payload.get(
-                daily_transaction = self.daily_user_spend_transactions[
+                "metadata", {}
-                    daily_transaction_key
+            )
-                ]
+            if isinstance(payload_metadata, str):
-                daily_transaction["spend"] += payload["spend"]
+                payload_metadata_json: Union[Dict, SpendLogsMetadata] = cast(
-                daily_transaction["prompt_tokens"] += payload["prompt_tokens"]
+                    Dict, json.loads(payload_metadata)
-                daily_transaction["completion_tokens"] += payload["completion_tokens"]
+                )
                daily_transaction["api_requests"] += 1
            else:
-                daily_transaction = DailyUserSpendTransaction(
+                payload_metadata_json = payload_metadata
-                    user_id=payload["user"],
+
-                    date=date,
+            # Check status in metadata dict
-                    api_key=payload["api_key"],
+            return (
-                    model=payload["model"],
+                "failure"
-                    model_group=payload["model_group"],
+                if payload_metadata_json.get("status") == "failure"
-                    custom_llm_provider=payload["custom_llm_provider"],
+                else "success"
                    prompt_tokens=payload["prompt_tokens"],
                    completion_tokens=payload["completion_tokens"],
                    spend=payload["spend"],
                    api_requests=1,
            )
-            self.daily_user_spend_transactions[
+        except (json.JSONDecodeError, AttributeError):
-                daily_transaction_key
+            # Default to success if metadata parsing fails
-            ] = daily_transaction
+            return "success"
        except Exception as e:
            raise e
    def hash_token(self, token: str):
        # Hash the string using SHA-256
@ -2422,7 +2403,10 @@ def _hash_token_if_needed(token: str) -> str:
 class ProxyUpdateSpend:
    @staticmethod
    async def update_end_user_spend(
-        n_retry_times: int, prisma_client: PrismaClient, proxy_logging_obj: ProxyLogging
+        n_retry_times: int,
        prisma_client: PrismaClient,
        proxy_logging_obj: ProxyLogging,
        end_user_list_transactions: Dict[str, float],
    ):
        for i in range(n_retry_times + 1):
            start_time = time.time()
@ -2434,7 +2418,7 @@ class ProxyUpdateSpend:
                        for (
                            end_user_id,
                            response_cost,
-                        ) in prisma_client.end_user_list_transactions.items():
+                        ) in end_user_list_transactions.items():
                            if litellm.max_end_user_budget is not None:
                                pass
                            batcher.litellm_endusertable.upsert(
@ -2461,10 +2445,6 @@ class ProxyUpdateSpend:
                _raise_failed_update_spend_exception(
                    e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
                )
            finally:
                prisma_client.end_user_list_transactions = (
                    {}
                )  # reset the end user list transactions - prevent bad data from causing issues
    @staticmethod
    async def update_spend_logs(
@ -2538,120 +2518,6 @@ class ProxyUpdateSpend:
                e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
            )
    @staticmethod
    async def update_daily_user_spend(
        n_retry_times: int,
        prisma_client: PrismaClient,
        proxy_logging_obj: ProxyLogging,
    ):
        """
        Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
        """
        BATCH_SIZE = (
            100  # Number of aggregated records to update in each database operation
        )
        start_time = time.time()
        try:
            for i in range(n_retry_times + 1):
                try:
                    # Get transactions to process
                    transactions_to_process = dict(
                        list(prisma_client.daily_user_spend_transactions.items())[
                            :BATCH_SIZE
                        ]
                    )
                    if len(transactions_to_process) == 0:
                        verbose_proxy_logger.debug(
                            "No new transactions to process for daily spend update"
                        )
                        break
                    # Update DailyUserSpend table in batches
                    async with prisma_client.db.batch_() as batcher:
                        for _, transaction in transactions_to_process.items():
                            user_id = transaction.get("user_id")
                            if not user_id:  # Skip if no user_id
                                continue
                            batcher.litellm_dailyuserspend.upsert(
                                where={
                                    "user_id_date_api_key_model_custom_llm_provider": {
                                        "user_id": user_id,
                                        "date": transaction["date"],
                                        "api_key": transaction["api_key"],
                                        "model": transaction["model"],
                                        "custom_llm_provider": transaction.get(
                                            "custom_llm_provider"
                                        ),
                                    }
                                },
                                data={
                                    "create": {
                                        "user_id": user_id,
                                        "date": transaction["date"],
                                        "api_key": transaction["api_key"],
                                        "model": transaction["model"],
                                        "model_group": transaction.get("model_group"),
                                        "custom_llm_provider": transaction.get(
                                            "custom_llm_provider"
                                        ),
                                        "prompt_tokens": transaction["prompt_tokens"],
                                        "completion_tokens": transaction[
                                            "completion_tokens"
                                        ],
                                        "spend": transaction["spend"],
                                        "api_requests": transaction["api_requests"],
                                    },
                                    "update": {
                                        "prompt_tokens": {
                                            "increment": transaction["prompt_tokens"]
                                        },
                                        "completion_tokens": {
                                            "increment": transaction[
                                                "completion_tokens"
                                            ]
                                        },
                                        "spend": {"increment": transaction["spend"]},
                                        "api_requests": {
                                            "increment": transaction["api_requests"]
                                        },
                                    },
                                },
                            )
                    verbose_proxy_logger.info(
                        f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
                    )
                    # Remove processed transactions
                    for key in transactions_to_process.keys():
                        prisma_client.daily_user_spend_transactions.pop(key, None)
                    verbose_proxy_logger.debug(
                        f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
                    )
                    break
                except DB_CONNECTION_ERROR_TYPES as e:
                    if i >= n_retry_times:
                        _raise_failed_update_spend_exception(
                            e=e,
                            start_time=start_time,
                            proxy_logging_obj=proxy_logging_obj,
                        )
                    await asyncio.sleep(2**i)  # Exponential backoff
        except Exception as e:
            # Remove processed transactions even if there was an error
            if "transactions_to_process" in locals():
                for key in transactions_to_process.keys():  # type: ignore
                    prisma_client.daily_user_spend_transactions.pop(key, None)
            _raise_failed_update_spend_exception(
                e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
            )
    @staticmethod
    def disable_spend_updates() -> bool:
        """
@ -2701,20 +2567,6 @@ async def update_spend(  # noqa: PLR0915
            db_writer_client=db_writer_client,
        )
    ### UPDATE DAILY USER SPEND ###
    verbose_proxy_logger.debug(
        "Daily User Spend transactions: {}".format(
            len(prisma_client.daily_user_spend_transactions)
        )
    )
    if len(prisma_client.daily_user_spend_transactions) > 0:
        await ProxyUpdateSpend.update_daily_user_spend(
            n_retry_times=n_retry_times,
            prisma_client=prisma_client,
            proxy_logging_obj=proxy_logging_obj,
        )
 def _raise_failed_update_spend_exception(
    e: Exception, start_time: float, proxy_logging_obj: ProxyLogging
--- a/litellm/types/llms/anthropic_messages/anthropic_response.py
+++ b/litellm/types/llms/anthropic_messages/anthropic_response.py
@ -0,0 +1,83 @@
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
 from typing_extensions import TypeAlias
 class AnthropicResponseTextBlock(TypedDict, total=False):
    """
    Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
    """
    citations: Optional[List[Dict[str, Any]]]
    text: str
    type: Literal["text"]
 class AnthropicResponseToolUseBlock(TypedDict, total=False):
    """
    Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
    """
    id: Optional[str]
    input: Optional[str]
    name: Optional[str]
    type: Literal["tool_use"]
 class AnthropicResponseThinkingBlock(TypedDict, total=False):
    """
    Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
    """
    signature: Optional[str]
    thinking: Optional[str]
    type: Literal["thinking"]
 class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
    """
    Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
    """
    data: Optional[str]
    type: Literal["redacted_thinking"]
 AnthropicResponseContentBlock: TypeAlias = Union[
    AnthropicResponseTextBlock,
    AnthropicResponseToolUseBlock,
    AnthropicResponseThinkingBlock,
    AnthropicResponseRedactedThinkingBlock,
 ]
 class AnthropicUsage(TypedDict, total=False):
    """
    Input and output tokens used in the request
    """
    input_tokens: int
    output_tokens: int
    """
    Cache Tokens Used
    """
    cache_creation_input_tokens: int
    cache_read_input_tokens: int
 class AnthropicMessagesResponse(TypedDict, total=False):
    """
    Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
    """
    content: Optional[List[AnthropicResponseContentBlock]]
    id: str
    model: Optional[str]  # This represents the Model type from Anthropic
    role: Optional[Literal["assistant"]]
    stop_reason: Optional[
        Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
    ]
    stop_sequence: Optional[str]
    type: Optional[Literal["message"]]
    usage: Optional[AnthropicUsage]
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -1113,3 +1113,6 @@ ResponsesAPIStreamingResponse = Annotated[
    ],
    Discriminator("type"),
 ]
 REASONING_EFFORT = Literal["low", "medium", "high"]
--- a/litellm/types/llms/openrouter.py
+++ b/litellm/types/llms/openrouter.py
@ -0,0 +1,9 @@
 import json
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
 class OpenRouterErrorMessage(TypedDict):
    message: str
    code: int
    metadata: Dict
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5901,9 +5901,10 @@ class ModelResponseIterator:
 class ModelResponseListIterator:
-    def __init__(self, model_responses):
+    def __init__(self, model_responses, delay: Optional[float] = None):
        self.model_responses = model_responses
        self.index = 0
        self.delay = delay
    # Sync iterator
    def __iter__(self):
@ -5914,6 +5915,8 @@ class ModelResponseListIterator:
            raise StopIteration
        model_response = self.model_responses[self.index]
        self.index += 1
        if self.delay:
            time.sleep(self.delay)
        return model_response
    # Async iterator
@ -5925,6 +5928,8 @@ class ModelResponseListIterator:
            raise StopAsyncIteration
        model_response = self.model_responses[self.index]
        self.index += 1
        if self.delay:
            await asyncio.sleep(self.delay)
        return model_response
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -4453,6 +4453,42 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
        "supports_tool_choice": true
    },
    "gemini-2.5-pro-exp-03-25": {
        "max_tokens": 65536,
        "max_input_tokens": 1048576,
        "max_output_tokens": 65536,
        "max_images_per_prompt": 3000,
        "max_videos_per_prompt": 10,
        "max_video_length": 1,
        "max_audio_length_hours": 8.4,
        "max_audio_per_prompt": 1,
        "max_pdf_size_mb": 30,
        "input_cost_per_image": 0,
        "input_cost_per_video_per_second": 0,
        "input_cost_per_audio_per_second": 0,
        "input_cost_per_token": 0,
        "input_cost_per_character": 0, 
        "input_cost_per_token_above_128k_tokens": 0, 
        "input_cost_per_character_above_128k_tokens": 0, 
        "input_cost_per_image_above_128k_tokens": 0,
        "input_cost_per_video_per_second_above_128k_tokens": 0,
        "input_cost_per_audio_per_second_above_128k_tokens": 0,
        "output_cost_per_token": 0,
        "output_cost_per_character": 0,
        "output_cost_per_token_above_128k_tokens": 0,
        "output_cost_per_character_above_128k_tokens": 0,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_audio_input": true,
        "supports_video_input": true,
        "supports_pdf_input": true,
        "supports_response_schema": true,
        "supports_tool_choice": true,
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
    "gemini-2.0-pro-exp-02-05": {
        "max_tokens": 8192,
        "max_input_tokens": 2097152,
@ -10189,6 +10225,22 @@
        "litellm_provider": "voyage",
        "mode": "rerank"
    },
    "databricks/databricks-claude-3-7-sonnet": {
        "max_tokens": 200000,
        "max_input_tokens": 200000,
        "max_output_tokens": 128000, 
        "input_cost_per_token": 0.0000025,
        "input_dbu_cost_per_token": 0.00003571,
        "output_cost_per_token": 0.00017857,
        "output_db_cost_per_token": 0.000214286,
        "litellm_provider": "databricks",
        "mode": "chat",
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
        "supports_assistant_prefill": true,
        "supports_function_calling": true,
        "supports_tool_choice": true
    },
    "databricks/databricks-meta-llama-3-1-405b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
@ -10217,7 +10269,7 @@
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
        "supports_tool_choice": true
    },
-    "databricks/meta-llama-3.3-70b-instruct": {
+    "databricks/databricks-meta-llama-3-3-70b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
--- a/mypy.ini
+++ b/mypy.ini
@ -2,6 +2,7 @@
 warn_return_any = False
 ignore_missing_imports = True
 mypy_path = litellm/stubs
 namespace_packages = True
 [mypy-google.*]
 ignore_missing_imports = True
--- a/poetry.lock
+++ b/poetry.lock
@ -1151,69 +1151,6 @@ files = [
 [package.extras]
 protobuf = ["grpcio-tools (>=1.70.0)"]
 [[package]]
 name = "grpcio"
 version = "1.71.0"
 description = "HTTP/2-based RPC framework"
 optional = true
 python-versions = ">=3.9"
 files = [
    {file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"},
    {file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"},
    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"},
    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"},
    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"},
    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"},
    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"},
    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"},
    {file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"},
    {file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"},
    {file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"},
    {file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"},
    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"},
    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"},
    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"},
    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"},
    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"},
    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"},
    {file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"},
    {file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"},
    {file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"},
    {file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"},
    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"},
    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"},
    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"},
    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"},
    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"},
    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"},
    {file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"},
    {file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"},
    {file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"},
    {file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"},
    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"},
    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"},
    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"},
    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"},
    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"},
    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"},
    {file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"},
    {file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"},
    {file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"},
    {file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"},
    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"},
    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"},
    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"},
    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"},
    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"},
    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"},
    {file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"},
    {file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"},
    {file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"},
 ]
 [package.extras]
 protobuf = ["grpcio-tools (>=1.71.0)"]
 [[package]]
 name = "grpcio-status"
 version = "1.70.0"
@ -1230,22 +1167,6 @@ googleapis-common-protos = ">=1.5.5"
 grpcio = ">=1.70.0"
 protobuf = ">=5.26.1,<6.0dev"
 [[package]]
 name = "grpcio-status"
 version = "1.71.0"
 description = "Status proto mapping for gRPC"
 optional = true
 python-versions = ">=3.9"
 files = [
    {file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"},
    {file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"},
 ]
 [package.dependencies]
 googleapis-common-protos = ">=1.5.5"
 grpcio = ">=1.71.0"
 protobuf = ">=5.26.1,<6.0dev"
 [[package]]
 name = "gunicorn"
 version = "23.0.0"
@ -1678,13 +1599,13 @@ referencing = ">=0.31.0"
 [[package]]
 name = "litellm-proxy-extras"
-version = "0.1.1"
+version = "0.1.2"
 description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
 optional = true
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
-    {file = "litellm_proxy_extras-0.1.1-py3-none-any.whl", hash = "sha256:2b3c4c5474bacbde2424c1cd13b21f85c65e9c4346f6159badd49a210eedef5c"},
+    {file = "litellm_proxy_extras-0.1.2-py3-none-any.whl", hash = "sha256:2caa7bdba5a533cd1781b55e3f7c581138d2a5b68a7e6d737327669dd21d5e08"},
-    {file = "litellm_proxy_extras-0.1.1.tar.gz", hash = "sha256:a1eb911ad2e3742238863d314a8bd6d02dd0cc213ba040b2c0593f132fbf3117"},
+    {file = "litellm_proxy_extras-0.1.2.tar.gz", hash = "sha256:218e97980ab5a34eed7dcd1564a910c9a790168d672cdec3c464eba9b7cb1518"},
 ]
 [[package]]
@ -4135,4 +4056,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi",
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "16cbf20784776377805f5e33c6bc97dce76303132aa3d81c7e6fe743f0ee3fc1"
+content-hash = "524b2f8276ba057f8dc8a79dd460c1a243ef4aece7c08a8bf344e029e07b8841"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.65.1"
+version = "1.65.2"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -55,7 +55,7 @@ websockets = {version = "^13.1.0", optional = true}
 boto3 = {version = "1.34.34", optional = true}
 redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
 mcp = {version = "1.5.0", optional = true, python = ">=3.10"}
-litellm-proxy-extras = {version = "0.1.1", optional = true}
+litellm-proxy-extras = {version = "0.1.2", optional = true}
 [tool.poetry.extras]
 proxy = [
@ -117,7 +117,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 [tool.commitizen]
-version = "1.65.1"
+version = "1.65.2"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -38,7 +38,7 @@ sentry_sdk==2.21.0 # for sentry error handling
 detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests
 cryptography==43.0.1
 tzdata==2025.1 # IANA time zone database
-litellm-proxy-extras==0.1.1 # for proxy extras - e.g. prisma migrations
+litellm-proxy-extras==0.1.2 # for proxy extras - e.g. prisma migrations
 ### LITELLM PACKAGE DEPENDENCIES
 python-dotenv==1.0.0 # for env 
--- a/schema.prisma
+++ b/schema.prisma
@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
  completion_tokens   Int      @default(0)
  spend               Float    @default(0.0)
  api_requests        Int      @default(0)
  successful_requests Int      @default(0)
  failed_requests     Int      @default(0)
  created_at          DateTime @default(now())
  updated_at          DateTime @updatedAt
@ -351,3 +353,4 @@ enum JobStatus {
  ACTIVE
  INACTIVE
 }
--- a/tests/litellm/litellm_core_utils/test_streaming_handler.py
+++ b/tests/litellm/litellm_core_utils/test_streaming_handler.py
@ -1,6 +1,7 @@
 import json
 import os
 import sys
 import time
 from unittest.mock import MagicMock, Mock, patch
 import pytest
@ -19,6 +20,7 @@ from litellm.types.utils import (
    Delta,
    ModelResponseStream,
    PromptTokensDetailsWrapper,
    StandardLoggingPayload,
    StreamingChoices,
    Usage,
 )
@ -36,6 +38,22 @@ def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
    return streaming_handler
@pytest.fixture
 def logging_obj() -> Logging:
    import time
    logging_obj = Logging(
        model="my-random-model",
        messages=[{"role": "user", "content": "Hey"}],
        stream=True,
        call_type="completion",
        start_time=time.time(),
        litellm_call_id="12345",
        function_id="1245",
    )
    return logging_obj
 bedrock_chunks = [
    ModelResponseStream(
        id="chatcmpl-d249def8-a78b-464c-87b5-3a6f43565292",
@ -577,3 +595,36 @@ def test_streaming_handler_with_stop_chunk(
        **args, model_response=ModelResponseStream()
    )
    assert returned_chunk is None
@pytest.mark.asyncio
 async def test_streaming_completion_start_time(logging_obj: Logging):
    """Test that the start time is set correctly"""
    from litellm.integrations.custom_logger import CustomLogger
    class MockCallback(CustomLogger):
        pass
    mock_callback = MockCallback()
    litellm.success_callback = [mock_callback, "langfuse"]
    completion_stream = ModelResponseListIterator(
        model_responses=bedrock_chunks, delay=0.1
    )
    response = CustomStreamWrapper(
        completion_stream=completion_stream,
        model="bedrock/claude-3-5-sonnet-20240620-v1:0",
        logging_obj=logging_obj,
    )
    async for chunk in response:
        print(chunk)
    await asyncio.sleep(2)
    assert logging_obj.model_call_details["completion_start_time"] is not None
    assert (
        logging_obj.model_call_details["completion_start_time"]
        < logging_obj.model_call_details["end_time"]
    )
--- a/tests/litellm/llms/openrouter/chat/test_openrouter_chat_transformation.py
+++ b/tests/litellm/llms/openrouter/chat/test_openrouter_chat_transformation.py
@ -0,0 +1,81 @@
 import json
 import os
 import sys
 from unittest.mock import AsyncMock, MagicMock, patch
 import httpx
 import pytest
 sys.path.insert(
    0, os.path.abspath("../../../../..")
 )  # Adds the parent directory to the system path
 from litellm.llms.openrouter.chat.transformation import (
    OpenRouterChatCompletionStreamingHandler,
    OpenRouterException,
 )
 class TestOpenRouterChatCompletionStreamingHandler:
    def test_chunk_parser_successful(self):
        handler = OpenRouterChatCompletionStreamingHandler(
            streaming_response=None, sync_stream=True
        )
        # Test input chunk
        chunk = {
            "id": "test_id",
            "created": 1234567890,
            "model": "test_model",
            "choices": [
                {"delta": {"content": "test content", "reasoning": "test reasoning"}}
            ],
        }
        # Parse chunk
        result = handler.chunk_parser(chunk)
        # Verify response
        assert result.id == "test_id"
        assert result.object == "chat.completion.chunk"
        assert result.created == 1234567890
        assert result.model == "test_model"
        assert len(result.choices) == 1
        assert result.choices[0]["delta"]["reasoning_content"] == "test reasoning"
    def test_chunk_parser_error_response(self):
        handler = OpenRouterChatCompletionStreamingHandler(
            streaming_response=None, sync_stream=True
        )
        # Test error chunk
        error_chunk = {
            "error": {
                "message": "test error",
                "code": 400,
                "metadata": {"key": "value"},
                "user_id": "test_user",
            }
        }
        # Verify error handling
        with pytest.raises(OpenRouterException) as exc_info:
            handler.chunk_parser(error_chunk)
        assert "Message: test error" in str(exc_info.value)
        assert exc_info.value.status_code == 400
    def test_chunk_parser_key_error(self):
        handler = OpenRouterChatCompletionStreamingHandler(
            streaming_response=None, sync_stream=True
        )
        # Test invalid chunk missing required fields
        invalid_chunk = {"incomplete": "data"}
        # Verify KeyError handling
        with pytest.raises(OpenRouterException) as exc_info:
            handler.chunk_parser(invalid_chunk)
        assert "KeyError" in str(exc_info.value)
        assert exc_info.value.status_code == 400
--- a/tests/litellm/llms/sagemaker/test_sagemaker_common_utils.py
+++ b/tests/litellm/llms/sagemaker/test_sagemaker_common_utils.py
@ -0,0 +1,97 @@
 import json
 import os
 import sys
 from unittest.mock import AsyncMock, MagicMock, patch
 import httpx
 import pytest
 sys.path.insert(0, os.path.abspath("../../../../.."))
 from litellm.llms.sagemaker.common_utils import AWSEventStreamDecoder
@pytest.mark.asyncio
 async def test_aiter_bytes_unicode_decode_error():
    """
    Test that AWSEventStreamDecoder.aiter_bytes() does not raise an error when encountering invalid UTF-8 bytes. (UnicodeDecodeError)
    Ensures stream processing continues despite the error.
    Relevant issue: https://github.com/BerriAI/litellm/issues/9165
    """
    # Create an instance of AWSEventStreamDecoder
    decoder = AWSEventStreamDecoder(model="test-model")
    # Create a mock event that will trigger a UnicodeDecodeError
    mock_event = MagicMock()
    mock_event.to_response_dict.return_value = {
        "status_code": 200,
        "headers": {},
        "body": b"\xff\xfe",  # Invalid UTF-8 bytes
    }
    # Create a mock EventStreamBuffer that yields our mock event
    mock_buffer = MagicMock()
    mock_buffer.__iter__.return_value = [mock_event]
    # Mock the EventStreamBuffer class
    with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
        # Create an async generator that yields some test bytes
        async def mock_iterator():
            yield b""
        # Process the stream
        chunks = []
        async for chunk in decoder.aiter_bytes(mock_iterator()):
            if chunk is not None:
                print("chunk=", chunk)
                chunks.append(chunk)
        # Verify that processing continued despite the error
        # The chunks list should be empty since we only sent invalid data
        assert len(chunks) == 0
@pytest.mark.asyncio
 async def test_aiter_bytes_valid_chunk_followed_by_unicode_error():
    """
    Test that valid chunks are processed correctly even when followed by Unicode decode errors.
    This ensures errors don't corrupt or prevent processing of valid data that came before.
    Relevant issue: https://github.com/BerriAI/litellm/issues/9165
    """
    decoder = AWSEventStreamDecoder(model="test-model")
    # Create two mock events - first valid, then invalid
    mock_valid_event = MagicMock()
    mock_valid_event.to_response_dict.return_value = {
        "status_code": 200,
        "headers": {},
        "body": json.dumps({"token": {"text": "hello"}}).encode(),  # Valid data first
    }
    mock_invalid_event = MagicMock()
    mock_invalid_event.to_response_dict.return_value = {
        "status_code": 200,
        "headers": {},
        "body": b"\xff\xfe",  # Invalid UTF-8 bytes second
    }
    # Create a mock EventStreamBuffer that yields valid event first, then invalid
    mock_buffer = MagicMock()
    mock_buffer.__iter__.return_value = [mock_valid_event, mock_invalid_event]
    with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
        async def mock_iterator():
            yield b"test_bytes"
        chunks = []
        async for chunk in decoder.aiter_bytes(mock_iterator()):
            if chunk is not None:
                chunks.append(chunk)
        # Verify we got our valid chunk despite the subsequent error
        assert len(chunks) == 1
        assert chunks[0]["text"] == "hello"  # Verify the content of the valid chunk
--- a/tests/litellm/llms/vertex_ai/test_vertex_anthropic_prompt_caching.py
+++ b/tests/litellm/llms/vertex_ai/test_vertex_anthropic_prompt_caching.py
@ -1,137 +0,0 @@
 import os
 import sys
 from unittest.mock import MagicMock, patch
 import pytest
 sys.path.insert(
    0, os.path.abspath("../../../..")
 )  # Adds the parent directory to the system path
 from litellm.llms.anthropic.chat.transformation import AnthropicConfig
 def test_anthropic_prompt_caching_headers_for_vertex():
    """
    Test that the prompt caching beta header is correctly set for Vertex AI requests
    with Anthropic models when cache control is present in the messages.
    """
    # Create an instance of AnthropicConfig
    config = AnthropicConfig()
    # Test case 1: Vertex request with prompt caching
    # Create a message with cache control
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant.",
            "cache_control": {"type": "ephemeral"}
        },
        {
            "role": "user",
            "content": "Tell me about the solar system."
        }
    ]
    # Check if cache control is detected
    is_cache_control_set = config.is_cache_control_set(messages=messages)
    assert is_cache_control_set is True, "Cache control should be detected in messages"
    # Generate headers for a Vertex AI request with prompt caching
    headers = config.get_anthropic_headers(
        api_key="test-api-key",
        prompt_caching_set=is_cache_control_set,
        is_vertex_request=True
    )
    # Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
    assert "anthropic-beta" in headers, "anthropic-beta header should be present"
    assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
    # Test case 2: Vertex request without prompt caching
    messages_without_cache = [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user",
            "content": "Tell me about the solar system."
        }
    ]
    # Check if cache control is detected
    is_cache_control_set = config.is_cache_control_set(messages=messages_without_cache)
    assert is_cache_control_set is False, "Cache control should not be detected in messages"
    # Generate headers for a Vertex AI request without prompt caching
    headers = config.get_anthropic_headers(
        api_key="test-api-key",
        prompt_caching_set=is_cache_control_set,
        is_vertex_request=True
    )
    # Verify that the anthropic-beta header is not set
    assert "anthropic-beta" not in headers, "anthropic-beta header should not be present"
 def test_anthropic_prompt_caching_with_content_blocks():
    """
    Test that prompt caching is correctly detected when cache control is in content blocks.
    """
    config = AnthropicConfig()
    # Message with cache control in content blocks
    messages = [
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": "You are a helpful assistant.",
                    "cache_control": {"type": "ephemeral"}
                }
            ]
        },
        {
            "role": "user",
            "content": "Tell me about the solar system."
        }
    ]
    # Check if cache control is detected
    is_cache_control_set = config.is_cache_control_set(messages=messages)
    assert is_cache_control_set is True, "Cache control should be detected in content blocks"
    # Generate headers for a Vertex AI request with prompt caching
    headers = config.get_anthropic_headers(
        api_key="test-api-key",
        prompt_caching_set=is_cache_control_set,
        is_vertex_request=True
    )
    # Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
    assert "anthropic-beta" in headers, "anthropic-beta header should be present"
    assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
 def test_anthropic_vertex_other_beta_headers():
    """
    Test that other beta headers are not included for Vertex AI requests.
    """
    config = AnthropicConfig()
    # Generate headers with multiple beta features
    headers = config.get_anthropic_headers(
        api_key="test-api-key",
        prompt_caching_set=True,
        computer_tool_used=True,  # This should be excluded for Vertex
        pdf_used=True,  # This should be excluded for Vertex
        is_vertex_request=True
    )
    # Verify that only prompt-caching is included in the beta header
    assert "anthropic-beta" in headers, "anthropic-beta header should be present"
    assert headers["anthropic-beta"] == "prompt-caching-2024-07-31", "Only prompt-caching should be in the beta header"
    assert "computer-use-2024-10-22" not in headers["anthropic-beta"], "computer-use beta should not be included"
    assert "pdfs-2024-09-25" not in headers["anthropic-beta"], "pdfs beta should not be included"
--- a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
+++ b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
@ -39,7 +39,7 @@ async def test_request_body_caching():
    result1 = await _read_request_body(mock_request)
    assert result1 == test_data
    assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == test_data
+    assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
    # Verify the body was read once
    mock_request.body.assert_called_once()
@ -49,7 +49,7 @@ async def test_request_body_caching():
    # Second call should use the cached body
    result2 = await _read_request_body(mock_request)
-    assert result2 == test_data
+    assert result2 == {"key": "value"}
    # Verify the body was not read again
    mock_request.body.assert_not_called()
@ -75,7 +75,10 @@ async def test_form_data_parsing():
    # Verify the form data was correctly parsed
    assert result == test_data
    assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == test_data
+    assert mock_request.scope["parsed_body"] == (
        ("name", "message"),
        {"name": "test_user", "message": "hello world"},
    )
    # Verify form() was called
    mock_request.form.assert_called_once()
@ -101,7 +104,46 @@ async def test_empty_request_body():
    # Verify an empty dict is returned
    assert result == {}
    assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == {}
+    assert mock_request.scope["parsed_body"] == ((), {})
    # Verify the body was read
    mock_request.body.assert_called_once()
@pytest.mark.asyncio
 async def test_circular_reference_handling():
    """
    Test that cached request body isn't modified when the returned result is modified.
    Demonstrates the mutable dictionary reference issue.
    """
    # Create a mock request with initial data
    mock_request = MagicMock()
    initial_body = {
        "model": "gpt-4",
        "messages": [{"role": "user", "content": "Hello"}],
    }
    mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
    mock_request.headers = {"content-type": "application/json"}
    mock_request.scope = {}
    # First parse
    result = await _read_request_body(mock_request)
    # Verify initial parse
    assert result["model"] == "gpt-4"
    assert result["messages"] == [{"role": "user", "content": "Hello"}]
    # Modify the result by adding proxy_server_request
    result["proxy_server_request"] = {
        "url": "http://0.0.0.0:4000/v1/chat/completions",
        "method": "POST",
        "headers": {"content-type": "application/json"},
        "body": result,  # Creates circular reference
    }
    # Second parse using the same request - will use the modified cached value
    result2 = await _read_request_body(mock_request)
    assert (
        "proxy_server_request" not in result2
    )  # This will pass, showing the cache pollution
--- a/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py
+++ b/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py
@ -0,0 +1,264 @@
 import asyncio
 import json
 import os
 import sys
 import pytest
 from fastapi.testclient import TestClient
 from litellm.proxy._types import (
    DailyUserSpendTransaction,
    Litellm_EntityType,
    SpendUpdateQueueItem,
 )
 from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
    DailySpendUpdateQueue,
 )
 from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 sys.path.insert(
    0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
@pytest.fixture
 def daily_spend_update_queue():
    return DailySpendUpdateQueue()
@pytest.mark.asyncio
 async def test_empty_queue_flush(daily_spend_update_queue):
    """Test flushing an empty queue returns an empty list"""
    result = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
    assert result == []
@pytest.mark.asyncio
 async def test_add_single_update(daily_spend_update_queue):
    """Test adding a single update to the queue"""
    test_key = "user1_2023-01-01_key123_gpt-4_openai"
    test_transaction = {
        "spend": 10.0,
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    # Add update to queue
    await daily_spend_update_queue.add_update({test_key: test_transaction})
    # Flush and check
    updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
    assert len(updates) == 1
    assert test_key in updates[0]
    assert updates[0][test_key] == test_transaction
@pytest.mark.asyncio
 async def test_add_multiple_updates(daily_spend_update_queue):
    """Test adding multiple updates to the queue"""
    test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
    test_transaction1 = {
        "spend": 10.0,
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
    test_transaction2 = {
        "spend": 5.0,
        "prompt_tokens": 200,
        "completion_tokens": 30,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    # Add updates to queue
    await daily_spend_update_queue.add_update({test_key1: test_transaction1})
    await daily_spend_update_queue.add_update({test_key2: test_transaction2})
    # Flush and check
    updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
    assert len(updates) == 2
    # Find each transaction in the list of updates
    found_transaction1 = False
    found_transaction2 = False
    for update in updates:
        if test_key1 in update:
            assert update[test_key1] == test_transaction1
            found_transaction1 = True
        if test_key2 in update:
            assert update[test_key2] == test_transaction2
            found_transaction2 = True
    assert found_transaction1
    assert found_transaction2
@pytest.mark.asyncio
 async def test_aggregated_daily_spend_update_empty(daily_spend_update_queue):
    """Test aggregating updates from an empty queue"""
    result = (
        await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
    )
    assert result == {}
@pytest.mark.asyncio
 async def test_get_aggregated_daily_spend_update_transactions_single_key():
    """Test static method for aggregating a single key"""
    test_key = "user1_2023-01-01_key123_gpt-4_openai"
    test_transaction = {
        "spend": 10.0,
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    updates = [{test_key: test_transaction}]
    # Test aggregation
    result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
        updates
    )
    assert len(result) == 1
    assert test_key in result
    assert result[test_key] == test_transaction
@pytest.mark.asyncio
 async def test_get_aggregated_daily_spend_update_transactions_multiple_keys():
    """Test static method for aggregating multiple different keys"""
    test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
    test_transaction1 = {
        "spend": 10.0,
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
    test_transaction2 = {
        "spend": 5.0,
        "prompt_tokens": 200,
        "completion_tokens": 30,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    updates = [{test_key1: test_transaction1}, {test_key2: test_transaction2}]
    # Test aggregation
    result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
        updates
    )
    assert len(result) == 2
    assert test_key1 in result
    assert test_key2 in result
    assert result[test_key1] == test_transaction1
    assert result[test_key2] == test_transaction2
@pytest.mark.asyncio
 async def test_get_aggregated_daily_spend_update_transactions_same_key():
    """Test static method for aggregating updates with the same key"""
    test_key = "user1_2023-01-01_key123_gpt-4_openai"
    test_transaction1 = {
        "spend": 10.0,
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    test_transaction2 = {
        "spend": 5.0,
        "prompt_tokens": 200,
        "completion_tokens": 30,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    expected_transaction = {
        "spend": 15.0,  # 10 + 5
        "prompt_tokens": 300,  # 100 + 200
        "completion_tokens": 80,  # 50 + 30
        "api_requests": 2,  # 1 + 1
        "successful_requests": 2,  # 1 + 1
        "failed_requests": 0,  # 0 + 0
    }
    updates = [{test_key: test_transaction1}, {test_key: test_transaction2}]
    # Test aggregation
    result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
        updates
    )
    assert len(result) == 1
    assert test_key in result
    assert result[test_key] == expected_transaction
@pytest.mark.asyncio
 async def test_flush_and_get_aggregated_daily_spend_update_transactions(
    daily_spend_update_queue,
 ):
    """Test the full workflow of adding, flushing, and aggregating updates"""
    test_key = "user1_2023-01-01_key123_gpt-4_openai"
    test_transaction1 = {
        "spend": 10.0,
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    test_transaction2 = {
        "spend": 5.0,
        "prompt_tokens": 200,
        "completion_tokens": 30,
        "api_requests": 1,
        "successful_requests": 1,
        "failed_requests": 0,
    }
    expected_transaction = {
        "spend": 15.0,  # 10 + 5
        "prompt_tokens": 300,  # 100 + 200
        "completion_tokens": 80,  # 50 + 30
        "api_requests": 2,  # 1 + 1
        "successful_requests": 2,  # 1 + 1
        "failed_requests": 0,  # 0 + 0
    }
    # Add updates to queue
    await daily_spend_update_queue.add_update({test_key: test_transaction1})
    await daily_spend_update_queue.add_update({test_key: test_transaction2})
    # Test full workflow
    result = (
        await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
    )
    assert len(result) == 1
    assert test_key in result
    assert result[test_key] == expected_transaction
--- a/tests/litellm/proxy/db/db_transaction_queue/test_pod_lock_manager.py
+++ b/tests/litellm/proxy/db/db_transaction_queue/test_pod_lock_manager.py
@ -12,7 +12,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
-from litellm.proxy.db.pod_lock_manager import PodLockManager
+from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
 # Mock Prisma client class
--- a/tests/litellm/proxy/db/db_transaction_queue/test_spend_update_queue.py
+++ b/tests/litellm/proxy/db/db_transaction_queue/test_spend_update_queue.py
@ -0,0 +1,152 @@
 import asyncio
 import json
 import os
 import sys
 import pytest
 from fastapi.testclient import TestClient
 from litellm.proxy._types import Litellm_EntityType, SpendUpdateQueueItem
 from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 sys.path.insert(
    0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
@pytest.fixture
 def spend_queue():
    return SpendUpdateQueue()
@pytest.mark.asyncio
 async def test_add_update(spend_queue):
    # Test adding a single update
    update: SpendUpdateQueueItem = {
        "entity_type": Litellm_EntityType.USER,
        "entity_id": "user123",
        "response_cost": 0.5,
    }
    await spend_queue.add_update(update)
    # Verify update was added by checking queue size
    assert spend_queue.update_queue.qsize() == 1
@pytest.mark.asyncio
 async def test_missing_response_cost(spend_queue):
    # Test with missing response_cost - should default to 0
    update: SpendUpdateQueueItem = {
        "entity_type": Litellm_EntityType.USER,
        "entity_id": "user123",
    }
    await spend_queue.add_update(update)
    aggregated = (
        await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
    )
    # Should have created entry with 0 cost
    assert aggregated["user_list_transactions"]["user123"] == 0
@pytest.mark.asyncio
 async def test_missing_entity_id(spend_queue):
    # Test with missing entity_id - should default to empty string
    update: SpendUpdateQueueItem = {
        "entity_type": Litellm_EntityType.USER,
        "response_cost": 1.0,
    }
    await spend_queue.add_update(update)
    aggregated = (
        await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
    )
    # Should use empty string as key
    assert aggregated["user_list_transactions"][""] == 1.0
@pytest.mark.asyncio
 async def test_none_values(spend_queue):
    # Test with None values
    update: SpendUpdateQueueItem = {
        "entity_type": Litellm_EntityType.USER,
        "entity_id": None,  # type: ignore
        "response_cost": None,
    }
    await spend_queue.add_update(update)
    aggregated = (
        await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
    )
    # Should handle None values gracefully
    assert aggregated["user_list_transactions"][""] == 0
@pytest.mark.asyncio
 async def test_multiple_updates_with_missing_fields(spend_queue):
    # Test multiple updates with various missing fields
    updates: list[SpendUpdateQueueItem] = [
        {
            "entity_type": Litellm_EntityType.USER,
            "entity_id": "user123",
            "response_cost": 0.5,
        },
        {
            "entity_type": Litellm_EntityType.USER,
            "entity_id": "user123",  # missing response_cost
        },
        {
            "entity_type": Litellm_EntityType.USER,  # missing entity_id
            "response_cost": 1.5,
        },
    ]
    for update in updates:
        await spend_queue.add_update(update)
    aggregated = (
        await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
    )
    # Verify aggregation
    assert (
        aggregated["user_list_transactions"]["user123"] == 0.5
    )  # only the first update with valid cost
    assert (
        aggregated["user_list_transactions"][""] == 1.5
    )  # update with missing entity_id
@pytest.mark.asyncio
 async def test_unknown_entity_type(spend_queue):
    # Test with unknown entity type
    update: SpendUpdateQueueItem = {
        "entity_type": "UNKNOWN_TYPE",  # type: ignore
        "entity_id": "123",
        "response_cost": 0.5,
    }
    await spend_queue.add_update(update)
    aggregated = (
        await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
    )
    # Should ignore unknown entity type
    assert all(len(transactions) == 0 for transactions in aggregated.values())
@pytest.mark.asyncio
 async def test_missing_entity_type(spend_queue):
    # Test with missing entity type
    update: SpendUpdateQueueItem = {"entity_id": "123", "response_cost": 0.5}
    await spend_queue.add_update(update)
    aggregated = (
        await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
    )
    # Should ignore updates without entity type
    assert all(len(transactions) == 0 for transactions in aggregated.values())
--- a/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py
+++ b/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py
@ -55,3 +55,30 @@ async def test_ui_view_users_with_null_email(mocker, caplog):
    assert response == [
        LiteLLM_UserTableFiltered(user_id="test-user-null-email", user_email=None)
    ]
 def test_user_daily_activity_types():
    """
    Assert all fiels in SpendMetrics are reported in DailySpendMetadata as "total_"
    """
    from litellm.proxy.management_endpoints.internal_user_endpoints import (
        DailySpendMetadata,
        SpendMetrics,
    )
    # Create a SpendMetrics instance
    spend_metrics = SpendMetrics()
    # Create a DailySpendMetadata instance
    daily_spend_metadata = DailySpendMetadata()
    # Assert all fields in SpendMetrics are reported in DailySpendMetadata as "total_"
    for field in spend_metrics.__dict__:
        if field.startswith("total_"):
            assert hasattr(
                daily_spend_metadata, field
            ), f"Field {field} is not reported in DailySpendMetadata"
        else:
            assert not hasattr(
                daily_spend_metadata, field
            ), f"Field {field} is reported in DailySpendMetadata"
--- a/tests/litellm/proxy/test_litellm_pre_call_utils.py
+++ b/tests/litellm/proxy/test_litellm_pre_call_utils.py
@ -0,0 +1,105 @@
 import json
 import os
 import sys
 from unittest.mock import MagicMock, patch
 import pytest
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.litellm_pre_call_utils import (
    _get_enforced_params,
    check_if_token_is_service_account,
 )
 sys.path.insert(
    0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 def test_check_if_token_is_service_account():
    """
    Test that only keys with `service_account_id` in metadata are considered service accounts
    """
    # Test case 1: Service account token
    service_account_token = UserAPIKeyAuth(
        api_key="test-key", metadata={"service_account_id": "test-service-account"}
    )
    assert check_if_token_is_service_account(service_account_token) == True
    # Test case 2: Regular user token
    regular_token = UserAPIKeyAuth(api_key="test-key", metadata={})
    assert check_if_token_is_service_account(regular_token) == False
    # Test case 3: Token with other metadata
    other_metadata_token = UserAPIKeyAuth(
        api_key="test-key", metadata={"user_id": "test-user"}
    )
    assert check_if_token_is_service_account(other_metadata_token) == False
 def test_get_enforced_params_for_service_account_settings():
    """
    Test that service account enforced params are only added to service account keys
    """
    service_account_token = UserAPIKeyAuth(
        api_key="test-key", metadata={"service_account_id": "test-service-account"}
    )
    general_settings_with_service_account_settings = {
        "service_account_settings": {"enforced_params": ["metadata.service"]},
    }
    result = _get_enforced_params(
        general_settings=general_settings_with_service_account_settings,
        user_api_key_dict=service_account_token,
    )
    assert result == ["metadata.service"]
    regular_token = UserAPIKeyAuth(
        api_key="test-key", metadata={"enforced_params": ["user"]}
    )
    result = _get_enforced_params(
        general_settings=general_settings_with_service_account_settings,
        user_api_key_dict=regular_token,
    )
    assert result == ["user"]
@pytest.mark.parametrize(
    "general_settings, user_api_key_dict, expected_enforced_params",
    [
        (
            {"enforced_params": ["param1", "param2"]},
            UserAPIKeyAuth(
                api_key="test_api_key", user_id="test_user_id", org_id="test_org_id"
            ),
            ["param1", "param2"],
        ),
        (
            {"service_account_settings": {"enforced_params": ["param1", "param2"]}},
            UserAPIKeyAuth(
                api_key="test_api_key",
                user_id="test_user_id",
                org_id="test_org_id",
                metadata={"service_account_id": "test_service_account_id"},
            ),
            ["param1", "param2"],
        ),
        (
            {"service_account_settings": {"enforced_params": ["param1", "param2"]}},
            UserAPIKeyAuth(
                api_key="test_api_key",
                metadata={
                    "enforced_params": ["param3", "param4"],
                    "service_account_id": "test_service_account_id",
                },
            ),
            ["param1", "param2", "param3", "param4"],
        ),
    ],
 )
 def test_get_enforced_params(
    general_settings, user_api_key_dict, expected_enforced_params
 ):
    from litellm.proxy.litellm_pre_call_utils import _get_enforced_params
    enforced_params = _get_enforced_params(general_settings, user_api_key_dict)
    assert enforced_params == expected_enforced_params
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -199,6 +199,42 @@ class BaseLLMChatTest(ABC):
        )
        assert response is not None
    def test_file_data_unit_test(self, pdf_messages):
        from litellm.utils import supports_pdf_input, return_raw_request
        from litellm.types.utils import CallTypes
        from litellm.litellm_core_utils.prompt_templates.factory import convert_to_anthropic_image_obj
        media_chunk = convert_to_anthropic_image_obj(
            openai_image_url=pdf_messages,
            format=None,
        )
        file_content = [
            {"type": "text", "text": "What's this file about?"},
            {
                "type": "file",
                "file": {
                    "file_data": pdf_messages,
                }
            },
        ]
        image_messages = [{"role": "user", "content": file_content}]
        base_completion_call_args = self.get_base_completion_call_args()
        if not supports_pdf_input(base_completion_call_args["model"], None):
            pytest.skip("Model does not support image input")
        raw_request = return_raw_request(
            endpoint=CallTypes.completion,
            kwargs={**base_completion_call_args, "messages": image_messages},
        )
        print("RAW REQUEST", raw_request)
        assert media_chunk["data"] in json.dumps(raw_request)
    def test_message_with_name(self):
        try:
            litellm.set_verbose = True
--- a/tests/llm_translation/test_openai.py
+++ b/tests/llm_translation/test_openai.py
@ -268,7 +268,7 @@ async def test_vision_with_custom_model():
                    {
                        "type": "image_url",
                        "image_url": {
-                            "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABB0lEQVRYhe2SzWrEIBCAh2A0jxEs4j6GLDS9hqWmV5Flt0cJS+lRwv742DXpEjY1kOZW6HwHFZnPmVEBEARBEARB/jd0KYA/bcUYbPrRLh6amXHJ/K+ypMoyUaGthILzw0l+xI0jsO7ZcmCcm4ILd+QuVYgpHOmDmz6jBeJImdcUCmeBqQpuqRIbVmQsLCrAalrGpfoEqEogqbLTWuXCPCo+Ki1XGqgQ+jVVuhB8bOaHkvmYuzm/b0KYLWwoK58oFqi6XfxQ4Uz7d6WeKpna6ytUs5e8betMcqAv5YPC5EZB2Lm9FIn0/VP6R58+/GEY1X1egVoZ/3bt/EqF6malgSAIgiDIH+QL41409QMY0LMAAAAASUVORK5CYII="
+                            "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABDElEQVRYhe2SzWqEMBRGPyQTfQxJsc5jBKGzFmlslyFIZxsCQ7sUaWd87EanpdpIrbtC71mE/NyTm9wEIAiCIAiC+N/otQBxU2Sf/aeh4enqptHXri+/yxIq63jlKCw6cXssnr3ObdzdGYFYCJ2IzHKXLygHXCB98Gm4DE+ZZemu5EisQSyZTmyg+AuzQbkezCuIy7EI0k9Ig3FtruwydY+qniqtV5yQyo8qpUIl2fc90KVzJWohWf2qu75vlw52rdfjVDHg8vLWwixW7PChqLkSyUadwfSS0uQZhEvRuIkS53uJvrK8cGWYaPwpGt8efvw+vlo8TPMzcmP8w7lrNypc1RsNgiAIgiD+Iu/RyDYhCaWrgQAAAABJRU5ErkJggg=="
                        },
                    },
                ],
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -1379,3 +1379,20 @@ def test_azure_modalities_param():
    )
    assert optional_params["modalities"] == ["text", "audio"]
    assert optional_params["audio"] == {"type": "audio_input", "input": "test.wav"}
@pytest.mark.parametrize(
    "model, provider",
    [
        ("claude-3-7-sonnet-20240620-v1:0", "anthropic"),
        ("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
        ("invoke/anthropic.claude-3-7-sonnet-20240620-v1:0", "bedrock"),
        ("claude-3-7-sonnet@20250219", "vertex_ai"),
    ],
 )
 def test_anthropic_unified_reasoning_content(model, provider):
    optional_params = get_optional_params(
        model=model,
        custom_llm_provider=provider,
        reasoning_effort="high",
    )
    assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -1280,7 +1280,8 @@ def test_completion_cost_databricks(model):
    resp = litellm.completion(model=model, messages=messages)  # works fine
    print(resp)
-    cost = completion_cost(completion_response=resp)
+    print(f"hidden_params: {resp._hidden_params}")
    assert resp._hidden_params["response_cost"] > 0
@pytest.mark.parametrize(
--- a/tests/local_testing/test_update_spend.py
+++ b/tests/local_testing/test_update_spend.py
@ -62,6 +62,8 @@ from litellm.proxy._types import (
    KeyRequest,
    NewUserRequest,
    UpdateKeyRequest,
    SpendUpdateQueueItem,
    Litellm_EntityType,
 )
 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
@ -93,7 +95,13 @@ def prisma_client():
@pytest.mark.asyncio
 async def test_batch_update_spend(prisma_client):
-    prisma_client.user_list_transactions["test-litellm-user-5"] = 23
+    await proxy_logging_obj.db_spend_update_writer.spend_update_queue.add_update(
        SpendUpdateQueueItem(
            entity_type=Litellm_EntityType.USER,
            entity_id="test-litellm-user-5",
            response_cost=23,
        )
    )
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    await litellm.proxy.proxy_server.prisma_client.connect()
--- a/Show more
+++ b/Show more
		`@ -1 +1 @@`
			<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>				<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/1f6915676624c422.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[38411,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"274\",\"static/chunks/274-bddaf0cf6c91e72f.js\",\"250\",\"static/chunks/250-dfc03a6fb4f0d254.js\",\"699\",\"static/chunks/699-87224ecba28f1f48.js\",\"931\",\"static/chunks/app/page-0f46d4a8b9bdf1c0.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Yb50LG5p7c9QpG54GIoFV\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/1f6915676624c422.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>